summary refs log tree commit diff
path: root/intl
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-08-20 19:52:54 +0000
committerUlrich Drepper <drepper@redhat.com>1999-08-20 19:52:54 +0000
commit6570e194e60822d81d6df31e260985e6a13b0f2a (patch)
tree505bc4144874d98bb1a19417ac3e69fe95debae9 /intl
parentbe7d999a0931203c5541714a255635459ee6dde2 (diff)
downloadglibc-6570e194e60822d81d6df31e260985e6a13b0f2a.tar.gz
glibc-6570e194e60822d81d6df31e260985e6a13b0f2a.tar.xz
glibc-6570e194e60822d81d6df31e260985e6a13b0f2a.zip
Update.
	* intl/gettextP.h (struct loaded_domain): Add conv element.
	* intl/dcgettext.c (find_msg): Rename to _nl_find_msg and make public.
	Instead of returning found message directly convert it using iconv
	if a conversion was found when opening the file.
	* intl/loadinfo.h: Protect against multiple inclusion.
	Declare _nl_find_msg.
	* intl/loadmsgcat.c (_nl_load_domain): Try to determine charset used
	in the message file and if necessary find approrpiate
	conversion to match currently selected charset.
Diffstat (limited to 'intl')
-rw-r--r--intl/dcgettext.c103
-rw-r--r--intl/gettextP.h12
-rw-r--r--intl/loadinfo.h12
-rw-r--r--intl/loadmsgcat.c55
4 files changed, 168 insertions, 14 deletions
diff --git a/intl/dcgettext.c b/intl/dcgettext.c
index cc5299e460..0429cc904e 100644
--- a/intl/dcgettext.c
+++ b/intl/dcgettext.c
@@ -83,6 +83,10 @@ void free ();
 # include <locale.h>
 #endif
 
+#if defined HAVE_SYS_PARAM_H || defined _LIBC
+# include <sys/param.h>
+#endif
+
 #include "gettext.h"
 #include "gettextP.h"
 #ifdef _LIBC
@@ -92,6 +96,11 @@ void free ();
 #endif
 #include "hash-string.h"
 
+/* Thread safetyness.  */
+#ifdef _LIBC
+# include <bits/libc-lock.h>
+#endif
+
 /* @@ end of prolog @@ */
 
 #ifdef _LIBC
@@ -171,8 +180,6 @@ const char _nl_default_dirname[] = GNULOCALEDIR;
 struct binding *_nl_domain_bindings;
 
 /* Prototypes for local functions.  */
-static char *find_msg PARAMS ((struct loaded_l10nfile *domain_file,
-			       const char *msgid)) internal_function;
 static const char *category_to_name PARAMS ((int category)) internal_function;
 static const char *guess_category_value PARAMS ((int category,
 						 const char *categoryname))
@@ -396,7 +403,7 @@ DCGETTEXT (domainname, msgid, category)
 
       if (domain != NULL)
 	{
-	  retval = find_msg (domain, msgid);
+	  retval = _nl_find_msg (domain, msgid);
 
 	  if (retval == NULL)
 	    {
@@ -404,7 +411,7 @@ DCGETTEXT (domainname, msgid, category)
 
 	      for (cnt = 0; domain->successor[cnt] != NULL; ++cnt)
 		{
-		  retval = find_msg (domain->successor[cnt], msgid);
+		  retval = _nl_find_msg (domain->successor[cnt], msgid);
 
 		  if (retval != NULL)
 		    break;
@@ -428,9 +435,9 @@ weak_alias (__dcgettext, dcgettext);
 #endif
 
 
-static char *
+char *
 internal_function
-find_msg (domain_file, msgid)
+_nl_find_msg (domain_file, msgid)
      struct loaded_l10nfile *domain_file;
      const char *msgid;
 {
@@ -464,8 +471,88 @@ find_msg (domain_file, msgid)
 	  && strcmp (msgid,
 		     domain->data + W (domain->must_swap,
 				       domain->orig_tab[nstr - 1].offset)) == 0)
-	return (char *) domain->data + W (domain->must_swap,
-					  domain->trans_tab[nstr - 1].offset);
+	{
+	  /* We found an entry.  If we have to convert the string to use
+	     a different character set this is the time.  */
+	  char *result =
+	    (char *) domain->data + W (domain->must_swap,
+				       domain->trans_tab[nstr - 1].offset);
+
+	  if (
+#if HAVE_ICONV || defined _LIBC
+	      domain->conv != (iconv_t) -1
+#endif
+	      )
+	    {
+	      /* We are supposed to do a conversion.  First allocate an
+		 appropriate table with the same structure as the hash
+		 table in the file where we can put the pointers to the
+		 converted strings in.  */
+	      if (domain->conv_tab == NULL
+		  && ((domain->conv_tab = (char **) calloc (domain->hash_size,
+							    sizeof (char *)))
+		      == NULL))
+		/* Mark that we didn't succeed allocating a table.  */
+		domain->conv_tab = (char **) -1;
+
+	      if (domain->conv_tab == (char **) -1)
+		/* Nothing we can do, no more memory.  */
+		return NULL;
+
+	      if (domain->conv_tab[idx] == NULL)
+		{
+		  /* We haven't used this string so far, so it is not
+		     translated yet.  Do this now.  */
+#ifdef _LIBC
+		  /* For glibc we use a bit more efficient memory handling.
+		     We allocate always larger blocks which get used over
+		     time.  This is faster than many small allocations.   */
+		  __libc_lock_define_initialized (static, lock)
+		  static char *freemem;
+		  static size_t freemem_size;
+		  /* Note that we include the NUL byte.  */
+		  size_t resultlen = strlen (result) + 1;
+		  const char *inbuf = result;
+		  size_t inbytesleft = resultlen;
+		  char *outbuf = freemem;
+		  size_t outbytesleft = freemem_size;
+
+		  __libc_lock_lock (lock);
+
+		  while (iconv (domain->conv, &inbuf, &inbytesleft, &outbuf,
+				&outbytesleft) == (size_t) -1L)
+		    {
+		      if (errno != E2BIG)
+			goto out;
+
+		      /* We must resize the buffer.  */
+		      freemem_size = MAX (2 * freemem_size, 4064);
+		      freemem = (char *) malloc (freemem_size);
+		      if (freemem == NULL)
+			goto out;
+
+		      inbuf = result;
+		      inbytesleft = resultlen;
+		      outbuf = freemem;
+		      outbytesleft = freemem_size;
+		    }
+
+		  /* We have now in our buffer a converted string.  Put this
+		     in the hash table  */
+		  domain->conv_tab[idx] = freemem;
+		  freemem = outbuf;
+		  freemem_size = outbytesleft;
+
+		out:
+		  __libc_lock_unlock (lock);
+#endif
+		}
+
+	      result = domain->conv_tab[idx];
+	    }
+
+	  return result;
+	}
 
       while (1)
 	{
diff --git a/intl/gettextP.h b/intl/gettextP.h
index bcbe2720a7..bea4404167 100644
--- a/intl/gettextP.h
+++ b/intl/gettextP.h
@@ -1,6 +1,6 @@
 /* Header describing internals of gettext library
-   Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
-   Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
+   Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
@@ -20,6 +20,10 @@
 #ifndef _GETTEXTP_H
 #define _GETTEXTP_H
 
+#if defined HAVE_ICONV || defined _LIBC
+# include <iconv.h>
+#endif
+
 #include "loadinfo.h"
 
 /* @@ end of prolog @@ */
@@ -67,6 +71,10 @@ struct loaded_domain
   struct string_desc *trans_tab;
   nls_uint32 hash_size;
   nls_uint32 *hash_tab;
+#if defined HAVE_ICONV || defined _LIBC
+  iconv_t conv;
+#endif
+  char **conv_tab;
 };
 
 struct binding
diff --git a/intl/loadinfo.h b/intl/loadinfo.h
index 35d98f0d9b..ea1bf05806 100644
--- a/intl/loadinfo.h
+++ b/intl/loadinfo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -17,6 +17,9 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
+#ifndef _LOADINFO_H
+#define _LOADINFO_H	1
+
 #ifndef PARAMS
 # if __STDC__
 #  define PARAMS(args) args
@@ -77,3 +80,10 @@ extern int _nl_explode_name PARAMS ((char *name, const char **language,
 				     const char **revision));
 
 extern char *_nl_find_language PARAMS ((const char *name));
+
+
+extern char *_nl_find_msg PARAMS ((struct loaded_l10nfile *domain_file,
+				   const char *msgid))
+     internal_function;
+
+#endif	/* loadinfo.h */
diff --git a/intl/loadmsgcat.c b/intl/loadmsgcat.c
index 76887e8b4f..23d738882b 100644
--- a/intl/loadmsgcat.c
+++ b/intl/loadmsgcat.c
@@ -31,10 +31,23 @@
 # include <stdlib.h>
 #endif
 
+#if defined HAVE_STRING_H || defined _LIBC
+# ifndef _GNU_SOURCE
+#  define _GNU_SOURCE	1
+# endif
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
 #if defined HAVE_UNISTD_H || defined _LIBC
 # include <unistd.h>
 #endif
 
+#ifdef _LIBC
+# include <langinfo.h>
+#endif
+
 #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP) \
     || (defined _LIBC && defined _POSIX_MAPPED_FILES)
 # include <sys/mman.h>
@@ -47,6 +60,10 @@
 #include "gettext.h"
 #include "gettextP.h"
 
+#ifdef _LIBC
+# include "../locale/localeinfo.h"
+#endif
+
 /* @@ end of prolog @@ */
 
 #ifdef _LIBC
@@ -79,6 +96,7 @@ _nl_load_domain (domain_file)
   struct mo_file_header *data = (struct mo_file_header *) -1;
   int use_mmap = 0;
   struct loaded_domain *domain;
+  char *nullentry;
 
   domain_file->decided = 1;
   domain_file->data = NULL;
@@ -200,9 +218,40 @@ _nl_load_domain (domain_file)
       return;
     }
 
-  /* Show that one domain is changed.  This might make some cached
-     translations invalid.  */
-  ++_nl_msg_cat_cntr;
+  /* Now find out about the character set the file is encoded with.
+     This can be found (in textual form) in the entry "".  If this
+     entry does not exist or if this does not contain the `charset='
+     information, we will assume the charset matches the one the
+     current locale and we don't have to perform any conversion.  */
+#if HAVE_ICONV || defined _LIBC
+  domain->conv = (iconv_t) -1;
+#endif
+  nullentry = _nl_find_msg (domain_file, "");
+  if (nullentry != NULL)
+    {
+      char *charsetstr = strstr (nullentry, "charset=");
+
+      if (charsetstr != NULL)
+	{
+	  size_t len;
+	  char *charset;
+
+	  charsetstr += strlen ("charset=");
+	  len = strcspn (charsetstr, " \t\n");
+
+	  charset = (char *) alloca (len + 1);
+#if defined _LIBC || HAVE_MEMPCPY
+	  *((char *) mempcpy (charset, charsetstr, len)) = '\0';
+#else
+	  memcpy (charset, charsetstr, len);
+	  charset[len] = '\0';
+#endif
+
+#if HAVE_ICONV || defined _LIBC
+	  domain->conv = iconv_open ((*_nl_current[LC_CTYPE])->values[_NL_ITEM_INDEX (CODESET)].string, charset);
+#endif
+	}
+    }
 }