about summary refs log tree commit diff
path: root/intl/dcigettext.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2005-04-04 22:37:38 +0000
committerUlrich Drepper <drepper@redhat.com>2005-04-04 22:37:38 +0000
commit4e82c610255f6a186c20c73e74f8e71dcda98efc (patch)
treeb55a3e6692a0261c576330950793bde88524e0e1 /intl/dcigettext.c
parent7a50b1f6d1bd959ae5dfb5539d9cd8935eb8d926 (diff)
downloadglibc-4e82c610255f6a186c20c73e74f8e71dcda98efc.tar.gz
glibc-4e82c610255f6a186c20c73e74f8e71dcda98efc.tar.xz
glibc-4e82c610255f6a186c20c73e74f8e71dcda98efc.zip
* intl/tst-gettext3.c: New file.
	* intl/tst-gettext3.sh: New file.
	* intl/Makefile (distribute): Add tst-gettext3.sh.
	(test-srcs): Add tst-gettext3.
	(tests): Depend on tst-gettext3.out.
	(tst-gettext3.out): New rule.
	(CFLAGS-tst-gettext3.c): New variable.

	Fix bug exposed by tst-gettext3.
	* intl/gettextP.h (struct converted_domain): New type.
	(struct loaded_domain): Remove the conv, conv_tab fields. Add
	conversions, nconversions fields.
	(_nl_init_domain_conv): Remove declaration.
	(_nl_free_domain_conv): Remove declaration.
	(_nl_find_msg): Add convert argument.
	* intl/dcigettext.c (DCIGETTEXT): Call _nl_find_msg with convert=1.
	(_nl_find_msg): Add convert argument. When a conversion to a different
	charset is needed, create a new converted_domain element, instead of
	throwing away the old converted translations.
	(get_output_charset): New function.
	* intl/loadmsgcat.c (_nl_init_domain_conv): Remove function.
	(_nl_free_domain_conv): Remove function.
	(_nl_load_domain): Initialize the conversions array to empty. Use
	_nl_find_msg instead of _nl_init_domain_conv to retrieve the header
	entry.
	(_nl_unload_domain): Free the conversions array and its contents.

	* intl/gettextP.h (struct loaded_domain): Remove codeset_cntr field.
	(struct binding): Likewise.
	* intl/bindtextdom.c (set_binding_values): Drop codeset_cntr
	modifications.
Diffstat (limited to 'intl/dcigettext.c')
-rw-r--r--intl/dcigettext.c485
1 files changed, 335 insertions, 150 deletions
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index 8163064edc..c73c719d94 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -326,6 +326,10 @@ static struct transmem_list *transmem_list;
 #else
 typedef unsigned char transmem_block_t;
 #endif
+#if defined _LIBC || HAVE_ICONV
+static const char *get_output_charset PARAMS ((struct binding *domainbinding))
+     internal_function;
+#endif
 
 
 /* Names for the libintl functions are a problem.  They must not clash
@@ -597,7 +601,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category)
 
       if (domain != NULL)
 	{
-	  retval = _nl_find_msg (domain, binding, msgid1, &retlen);
+	  retval = _nl_find_msg (domain, binding, msgid1, 1, &retlen);
 
 	  if (retval == NULL)
 	    {
@@ -606,7 +610,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category)
 	      for (cnt = 0; domain->successor[cnt] != NULL; ++cnt)
 		{
 		  retval = _nl_find_msg (domain->successor[cnt], binding,
-					 msgid1, &retlen);
+					 msgid1, 1, &retlen);
 
 		  if (retval != NULL)
 		    {
@@ -683,10 +687,11 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category)
 
 char *
 internal_function
-_nl_find_msg (domain_file, domainbinding, msgid, lengthp)
+_nl_find_msg (domain_file, domainbinding, msgid, convert, lengthp)
      struct loaded_l10nfile *domain_file;
      struct binding *domainbinding;
      const char *msgid;
+     int convert;
      size_t *lengthp;
 {
   struct loaded_domain *domain;
@@ -793,192 +798,317 @@ _nl_find_msg (domain_file, domainbinding, msgid, lengthp)
     }
 
 #if defined _LIBC || HAVE_ICONV
-  if (domain->codeset_cntr
-      != (domainbinding != NULL ? domainbinding->codeset_cntr : 0))
+  if (convert)
     {
-      /* The domain's codeset has changed through bind_textdomain_codeset()
-	 since the message catalog was initialized or last accessed.  We
-	 have to reinitialize the converter.  */
-      _nl_free_domain_conv (domain);
-      _nl_init_domain_conv (domain_file, domain, domainbinding);
-    }
+      /* We are supposed to do a conversion.  */
+      const char *encoding = get_output_charset (domainbinding);
+
+      /* Search whether a table with converted translations for this
+	 encoding has already been allocated.  */
+      size_t nconversions = domain->nconversions;
+      struct converted_domain *convd = NULL;
+      size_t i;
+
+      for (i = nconversions; i > 0; )
+	{
+	  i--;
+	  if (strcmp (domain->conversions[i].encoding, encoding) == 0)
+	    {
+	      convd = &domain->conversions[i];
+	      break;
+	    }
+	}
 
-  if (
+      if (convd == NULL)
+	{
+	  /* Allocate a table for the converted translations for this
+	     encoding.  */
+	  struct converted_domain *new_conversions =
+	    (struct converted_domain *)
+	    (domain->conversions != NULL
+	     ? realloc (domain->conversions,
+			(nconversions + 1) * sizeof (struct converted_domain))
+	     : malloc ((nconversions + 1) * sizeof (struct converted_domain)));
+
+	  if (__builtin_expect (new_conversions == NULL, 0))
+	    /* Nothing we can do, no more memory.  */
+	    goto converted;
+	  domain->conversions = new_conversions;
+
+	  /* Copy the 'encoding' string to permanent storage.  */
+	  encoding = strdup (encoding);
+	  if (__builtin_expect (encoding == NULL, 0))
+	    /* Nothing we can do, no more memory.  */
+	    goto converted;
+
+	  convd = &new_conversions[nconversions];
+	  convd->encoding = encoding;
+
+	  /* Find out about the character set the file is encoded with.
+	     This can be found (in textual form) in the entry "".  If this
+	     entry does not exist or if this does not contain the 'charset='
+	     information, we will assume the charset matches the one the
+	     current locale and we don't have to perform any conversion.  */
 # ifdef _LIBC
-      domain->conv != (__gconv_t) -1
+	  convd->conv = (__gconv_t) -1;
 # else
 #  if HAVE_ICONV
-      domain->conv != (iconv_t) -1
+	  convd->conv = (iconv_t) -1;
 #  endif
 # endif
-      )
-    {
-      /* We are supposed to do a conversion.  First allocate an
-	 appropriate table with the same structure as the table
-	 of translations in the file, where we can put the pointers
-	 to the converted strings in.
-	 There is a slight complication with plural entries.  They
-	 are represented by consecutive NUL terminated strings.  We
-	 handle this case by converting RESULTLEN bytes, including
-	 NULs.  */
-
-      if (domain->conv_tab == NULL
-	  && ((domain->conv_tab =
-		 (char **) calloc (nstrings + domain->n_sysdep_strings,
-				   sizeof (char *)))
-	      == NULL))
-	/* Mark that we didn't succeed allocating a table.  */
-	domain->conv_tab = (char **) -1;
-
-      if (__builtin_expect (domain->conv_tab == (char **) -1, 0))
-	/* Nothing we can do, no more memory.  */
-	goto converted;
-
-      if (domain->conv_tab[act] == NULL)
+	  {
+	    char *nullentry;
+	    size_t nullentrylen;
+
+	    /* Get the header entry.  This is a recursion, but it doesn't
+	       reallocate domain->conversions because we pass convert = 0.  */
+	    nullentry =
+	      _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen);
+
+	    if (nullentry != NULL)
+	      {
+		const char *charsetstr;
+
+		charsetstr = strstr (nullentry, "charset=");
+		if (charsetstr != NULL)
+		  {
+		    size_t len;
+		    char *charset;
+		    const char *outcharset;
+
+		    charsetstr += strlen ("charset=");
+		    len = strcspn (charsetstr, " \t\n");
+
+		    charset = (char *) alloca (len + 1);
+# if defined _LIBC || HAVE_MEMPCPY
+		    *((char *) mempcpy (charset, charsetstr, len)) = '\0';
+# else
+		    memcpy (charset, charsetstr, len);
+		    charset[len] = '\0';
+# endif
+
+		    outcharset = encoding;
+
+# ifdef _LIBC
+		    /* We always want to use transliteration.  */
+		    outcharset = norm_add_slashes (outcharset, "TRANSLIT");
+		    charset = norm_add_slashes (charset, "");
+		    if (__gconv_open (outcharset, charset, &convd->conv,
+				      GCONV_AVOID_NOCONV)
+			!= __GCONV_OK)
+		      convd->conv = (__gconv_t) -1;
+# else
+#  if HAVE_ICONV
+		    /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+		       we want to use transliteration.  */
+#   if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
+       || _LIBICONV_VERSION >= 0x0105
+		    if (strchr (outcharset, '/') == NULL)
+		      {
+			char *tmp;
+
+			len = strlen (outcharset);
+			tmp = (char *) alloca (len + 10 + 1);
+			memcpy (tmp, outcharset, len);
+			memcpy (tmp + len, "//TRANSLIT", 10 + 1);
+			outcharset = tmp;
+
+			convd->conv = iconv_open (outcharset, charset);
+
+			freea (outcharset);
+		      }
+		    else
+#   endif
+		      convd->conv = iconv_open (outcharset, charset);
+#  endif
+# endif
+
+		    freea (charset);
+		  }
+	      }
+	  }
+	  convd->conv_tab = NULL;
+	  /* Here domain->conversions is still == new_conversions.  */
+	  domain->nconversions++;
+	}
+
+      if (
+# ifdef _LIBC
+	  convd->conv != (__gconv_t) -1
+# else
+#  if HAVE_ICONV
+	  convd->conv != (iconv_t) -1
+#  endif
+# endif
+	  )
 	{
-	  /* We haven't used this string so far, so it is not
-	     translated yet.  Do this now.  */
-	  /* We use a bit more efficient memory handling.
-	     We allocate always larger blocks which get used over
-	     time.  This is faster than many small allocations.   */
-	  __libc_lock_define_initialized (static, lock)
+	  /* We are supposed to do a conversion.  First allocate an
+	     appropriate table with the same structure as the table
+	     of translations in the file, where we can put the pointers
+	     to the converted strings in.
+	     There is a slight complication with plural entries.  They
+	     are represented by consecutive NUL terminated strings.  We
+	     handle this case by converting RESULTLEN bytes, including
+	     NULs.  */
+
+	  if (convd->conv_tab == NULL
+	      && ((convd->conv_tab =
+		    (char **) calloc (nstrings + domain->n_sysdep_strings,
+				      sizeof (char *)))
+		  == NULL))
+	    /* Mark that we didn't succeed allocating a table.  */
+	    convd->conv_tab = (char **) -1;
+
+	  if (__builtin_expect (convd->conv_tab == (char **) -1, 0))
+	    /* Nothing we can do, no more memory.  */
+	    goto converted;
+
+	  if (convd->conv_tab[act] == NULL)
+	    {
+	      /* We haven't used this string so far, so it is not
+		 translated yet.  Do this now.  */
+	      /* We use a bit more efficient memory handling.
+		 We allocate always larger blocks which get used over
+		 time.  This is faster than many small allocations.   */
+	      __libc_lock_define_initialized (static, lock)
 # define INITIAL_BLOCK_SIZE	4080
-	  static unsigned char *freemem;
-	  static size_t freemem_size;
+	      static unsigned char *freemem;
+	      static size_t freemem_size;
 
-	  const unsigned char *inbuf;
-	  unsigned char *outbuf;
-	  int malloc_count;
+	      const unsigned char *inbuf;
+	      unsigned char *outbuf;
+	      int malloc_count;
 # ifndef _LIBC
-	  transmem_block_t *transmem_list = NULL;
+	      transmem_block_t *transmem_list = NULL;
 # endif
 
-	  __libc_lock_lock (lock);
+	      __libc_lock_lock (lock);
 
-	  inbuf = (const unsigned char *) result;
-	  outbuf = freemem + sizeof (size_t);
+	      inbuf = (const unsigned char *) result;
+	      outbuf = freemem + sizeof (size_t);
 
-	  malloc_count = 0;
-	  while (1)
-	    {
-	      transmem_block_t *newmem;
+	      malloc_count = 0;
+	      while (1)
+		{
+		  transmem_block_t *newmem;
 # ifdef _LIBC
-	      size_t non_reversible;
-	      int res;
+		  size_t non_reversible;
+		  int res;
 
-	      if (freemem_size < sizeof (size_t))
-		goto resize_freemem;
+		  if (freemem_size < sizeof (size_t))
+		    goto resize_freemem;
 
-	      res = __gconv (domain->conv,
-			     &inbuf, inbuf + resultlen,
-			     &outbuf,
-			     outbuf + freemem_size - sizeof (size_t),
-			     &non_reversible);
+		  res = __gconv (convd->conv,
+				 &inbuf, inbuf + resultlen,
+				 &outbuf,
+				 outbuf + freemem_size - sizeof (size_t),
+				 &non_reversible);
 
-	      if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT)
-		break;
+		  if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT)
+		    break;
 
-	      if (res != __GCONV_FULL_OUTPUT)
-		{
-		  __libc_lock_unlock (lock);
-		  goto converted;
-		}
+		  if (res != __GCONV_FULL_OUTPUT)
+		    {
+		      __libc_lock_unlock (lock);
+		      goto converted;
+		    }
 
-	      inbuf = (const unsigned char *) result;
+		  inbuf = (const unsigned char *) result;
 # else
 #  if HAVE_ICONV
-	      const char *inptr = (const char *) inbuf;
-	      size_t inleft = resultlen;
-	      char *outptr = (char *) outbuf;
-	      size_t outleft;
-
-	      if (freemem_size < sizeof (size_t))
-		goto resize_freemem;
-
-	      outleft = freemem_size - sizeof (size_t);
-	      if (iconv (domain->conv,
-			 (ICONV_CONST char **) &inptr, &inleft,
-			 &outptr, &outleft)
-		  != (size_t) (-1))
-		{
-		  outbuf = (unsigned char *) outptr;
-		  break;
-		}
-	      if (errno != E2BIG)
-		{
-		  __libc_lock_unlock (lock);
-		  goto converted;
-		}
+		  const char *inptr = (const char *) inbuf;
+		  size_t inleft = resultlen;
+		  char *outptr = (char *) outbuf;
+		  size_t outleft;
+
+		  if (freemem_size < sizeof (size_t))
+		    goto resize_freemem;
+
+		  outleft = freemem_size - sizeof (size_t);
+		  if (iconv (convd->conv,
+			     (ICONV_CONST char **) &inptr, &inleft,
+			     &outptr, &outleft)
+		      != (size_t) (-1))
+		    {
+		      outbuf = (unsigned char *) outptr;
+		      break;
+		    }
+		  if (errno != E2BIG)
+		    {
+		      __libc_lock_unlock (lock);
+		      goto converted;
+		    }
 #  endif
 # endif
 
-	    resize_freemem:
-	      /* We must allocate a new buffer or resize the old one.  */
-	      if (malloc_count > 0)
-		{
-		  ++malloc_count;
-		  freemem_size = malloc_count * INITIAL_BLOCK_SIZE;
-		  newmem = (transmem_block_t *) realloc (transmem_list,
-							 freemem_size);
+		resize_freemem:
+		  /* We must allocate a new buffer or resize the old one.  */
+		  if (malloc_count > 0)
+		    {
+		      ++malloc_count;
+		      freemem_size = malloc_count * INITIAL_BLOCK_SIZE;
+		      newmem = (transmem_block_t *) realloc (transmem_list,
+							     freemem_size);
 # ifdef _LIBC
-		  if (newmem != NULL)
-		    transmem_list = transmem_list->next;
+		      if (newmem != NULL)
+			transmem_list = transmem_list->next;
+		      else
+			{
+			  struct transmem_list *old = transmem_list;
+
+			  transmem_list = transmem_list->next;
+			  free (old);
+			}
+# endif
+		    }
 		  else
 		    {
-		      struct transmem_list *old = transmem_list;
-
-		      transmem_list = transmem_list->next;
-		      free (old);
+		      malloc_count = 1;
+		      freemem_size = INITIAL_BLOCK_SIZE;
+		      newmem = (transmem_block_t *) malloc (freemem_size);
+		    }
+		  if (__builtin_expect (newmem == NULL, 0))
+		    {
+		      freemem = NULL;
+		      freemem_size = 0;
+		      __libc_lock_unlock (lock);
+		      goto converted;
 		    }
-# endif
-		}
-	      else
-		{
-		  malloc_count = 1;
-		  freemem_size = INITIAL_BLOCK_SIZE;
-		  newmem = (transmem_block_t *) malloc (freemem_size);
-		}
-	      if (__builtin_expect (newmem == NULL, 0))
-		{
-		  freemem = NULL;
-		  freemem_size = 0;
-		  __libc_lock_unlock (lock);
-		  goto converted;
-		}
 
 # ifdef _LIBC
-	      /* Add the block to the list of blocks we have to free
-                 at some point.  */
-	      newmem->next = transmem_list;
-	      transmem_list = newmem;
+		  /* Add the block to the list of blocks we have to free
+		     at some point.  */
+		  newmem->next = transmem_list;
+		  transmem_list = newmem;
 
-	      freemem = (unsigned char *) newmem->data;
-	      freemem_size -= offsetof (struct transmem_list, data);
+		  freemem = (unsigned char *) newmem->data;
+		  freemem_size -= offsetof (struct transmem_list, data);
 # else
-	      transmem_list = newmem;
-	      freemem = newmem;
+		  transmem_list = newmem;
+		  freemem = newmem;
 # endif
 
-	      outbuf = freemem + sizeof (size_t);
+		  outbuf = freemem + sizeof (size_t);
+		}
+
+	      /* We have now in our buffer a converted string.  Put this
+		 into the table of conversions.  */
+	      *(size_t *) freemem = outbuf - freemem - sizeof (size_t);
+	      convd->conv_tab[act] = (char *) freemem;
+	      /* Shrink freemem, but keep it aligned.  */
+	      freemem_size -= outbuf - freemem;
+	      freemem = outbuf;
+	      freemem += freemem_size & (alignof (size_t) - 1);
+	      freemem_size = freemem_size & ~ (alignof (size_t) - 1);
+
+	      __libc_lock_unlock (lock);
 	    }
 
-	  /* We have now in our buffer a converted string.  Put this
-	     into the table of conversions.  */
-	  *(size_t *) freemem = outbuf - freemem - sizeof (size_t);
-	  domain->conv_tab[act] = (char *) freemem;
-	  /* Shrink freemem, but keep it aligned.  */
-	  freemem_size -= outbuf - freemem;
-	  freemem = outbuf;
-	  freemem += freemem_size & (alignof (size_t) - 1);
-	  freemem_size = freemem_size & ~ (alignof (size_t) - 1);
-
-	  __libc_lock_unlock (lock);
+	  /* Now convd->conv_tab[act] contains the translation of all
+	     the plural variants.  */
+	  result = convd->conv_tab[act] + sizeof (size_t);
+	  resultlen = *(size_t *) convd->conv_tab[act];
 	}
-
-      /* Now domain->conv_tab[act] contains the translation of all
-	 the plural variants.  */
-      result = domain->conv_tab[act] + sizeof (size_t);
-      resultlen = *(size_t *) domain->conv_tab[act];
     }
 
  converted:
@@ -1122,6 +1252,61 @@ guess_category_value (category, categoryname)
   return language != NULL && strcmp (retval, "C") != 0 ? language : retval;
 }
 
+#if defined _LIBC || HAVE_ICONV
+/* Returns the output charset.  */
+static const char *
+internal_function
+get_output_charset (domainbinding)
+     struct binding *domainbinding;
+{
+  /* The output charset should normally be determined by the locale.  But
+     sometimes the locale is not used or not correctly set up, so we provide
+     a possibility for the user to override this: the OUTPUT_CHARSET
+     environment variable.  Moreover, the value specified through
+     bind_textdomain_codeset overrides both.  */
+  if (domainbinding != NULL && domainbinding->codeset != NULL)
+    return domainbinding->codeset;
+  else
+    {
+      /* For speed reasons, we look at the value of OUTPUT_CHARSET only
+	 once.  This is a user variable that is not supposed to change
+	 during a program run.  */
+      static char *output_charset_cache;
+      static int output_charset_cached;
+
+      if (!output_charset_cached)
+	{
+	  const char *value = getenv ("OUTPUT_CHARSET");
+
+	  if (value != NULL && value[0] != '\0')
+	    {
+	      size_t len = strlen (value) + 1;
+	      char *value_copy = (char *) malloc (len);
+
+	      if (value_copy != NULL)
+		memcpy (value_copy, value, len);
+	      output_charset_cache = value_copy;
+	    }
+	  output_charset_cached = 1;
+	}
+
+      if (output_charset_cache != NULL)
+	return output_charset_cache;
+      else
+	{
+# ifdef _LIBC
+	  return _NL_CURRENT (LC_CTYPE, CODESET);
+# else
+#  if HAVE_ICONV
+	  extern const char *locale_charset PARAMS ((void);
+	  return locale_charset ();
+#  endif
+# endif
+	}
+    }
+}
+#endif
+
 /* @@ begin of epilog @@ */
 
 /* We don't want libintl.a to depend on any other library.  So we