diff options
author | Arjun Shankar <arjun@redhat.com> | 2020-09-25 14:47:06 +0200 |
---|---|---|
committer | Arjun Shankar <arjun@redhat.com> | 2020-09-25 14:57:32 +0200 |
commit | fe62c4d173f3cc1ac64f01e75a8f421b2f092cdb (patch) | |
tree | 42540b3bededd8e65dce1a9d5992f275ab560489 | |
parent | 386543bc4495f658dcce6cd4d11e4ba6574a46f5 (diff) | |
download | glibc-fe62c4d173f3cc1ac64f01e75a8f421b2f092cdb.tar.gz glibc-fe62c4d173f3cc1ac64f01e75a8f421b2f092cdb.tar.xz glibc-fe62c4d173f3cc1ac64f01e75a8f421b2f092cdb.zip |
intl: Handle translation output codesets with suffixes [BZ #26383]
Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not handle cases where the output codeset for translations (via the `gettext' family of functions) might have a caller specified encoding suffix such as TRANSLIT or IGNORE. This led to a regression where translations did not work when the codeset had a suffix. This commit fixes the above issue by parsing any suffixes passed to __dcigettext and adds two new test-cases to intl/tst-codeset.c to verify correct behaviour. The iconv-internal function __gconv_create_spec and the static iconv-internal function gconv_destroy_spec are now visible internally within glibc and used in intl/dcigettext.c. (cherry picked from commit 7d4ec75e111291851620c6aa2c4460647b7fd50d)
-rw-r--r-- | iconv/Versions | 4 | ||||
-rw-r--r-- | iconv/gconv_charset.c | 10 | ||||
-rw-r--r-- | iconv/gconv_charset.h | 27 | ||||
-rw-r--r-- | iconv/gconv_int.h | 21 | ||||
-rw-r--r-- | iconv/iconv_open.c | 2 | ||||
-rw-r--r-- | iconv/iconv_prog.c | 2 | ||||
-rw-r--r-- | intl/dcigettext.c | 17 | ||||
-rw-r--r-- | intl/tst-codeset.c | 34 |
8 files changed, 60 insertions, 57 deletions
diff --git a/iconv/Versions b/iconv/Versions index 8a5f4cf780..d51af52fa3 100644 --- a/iconv/Versions +++ b/iconv/Versions @@ -6,7 +6,9 @@ libc { GLIBC_PRIVATE { # functions shared with iconv program __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; - __gconv_open; __gconv_create_spec; + + # functions used elsewhere in glibc + __gconv_open; __gconv_create_spec; __gconv_destroy_spec; # function used by the gconv modules __gconv_transliterate; diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c index 6ccd0773cc..4ba0aa99f5 100644 --- a/iconv/gconv_charset.c +++ b/iconv/gconv_charset.c @@ -216,3 +216,13 @@ out: return ret; } libc_hidden_def (__gconv_create_spec) + + +void +__gconv_destroy_spec (struct gconv_spec *conv_spec) +{ + free (conv_spec->fromcode); + free (conv_spec->tocode); + return; +} +libc_hidden_def (__gconv_destroy_spec) diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h index b39b09aea1..e9c122cf7e 100644 --- a/iconv/gconv_charset.h +++ b/iconv/gconv_charset.h @@ -48,33 +48,6 @@ #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" -/* This function accepts the charset names of the source and destination of the - conversion and populates *conv_spec with an equivalent conversion - specification that may later be used by __gconv_open. The charset names - might contain options in the form of suffixes that alter the conversion, - e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring - and truncating any suffix options in fromcode, and processing and truncating - any suffix options in tocode. Supported suffix options ("TRANSLIT" or - "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec - to be set to true. Unrecognized suffix options are silently discarded. If - the function succeeds, it returns conv_spec back to the caller. It returns - NULL upon failure. */ -struct gconv_spec * -__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, - const char *tocode); -libc_hidden_proto (__gconv_create_spec) - - -/* This function frees all heap memory allocated by __gconv_create_spec. */ -static void __attribute__ ((unused)) -gconv_destroy_spec (struct gconv_spec *conv_spec) -{ - free (conv_spec->fromcode); - free (conv_spec->tocode); - return; -} - - /* This function copies in-order, characters from the source 's' that are either alpha-numeric or one in one of these: "_-.,:/" - into the destination 'wp' while dropping all other characters. In the process, it converts all diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index e86938dae7..f721ce30ff 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec, __gconv_t *handle, int flags); libc_hidden_proto (__gconv_open) +/* This function accepts the charset names of the source and destination of the + conversion and populates *conv_spec with an equivalent conversion + specification that may later be used by __gconv_open. The charset names + might contain options in the form of suffixes that alter the conversion, + e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring + and truncating any suffix options in fromcode, and processing and truncating + any suffix options in tocode. Supported suffix options ("TRANSLIT" or + "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec + to be set to true. Unrecognized suffix options are silently discarded. If + the function succeeds, it returns conv_spec back to the caller. It returns + NULL upon failure. */ +extern struct gconv_spec * +__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, + const char *tocode); +libc_hidden_proto (__gconv_create_spec) + +/* This function frees all heap memory allocated by __gconv_create_spec. */ +extern void +__gconv_destroy_spec (struct gconv_spec *conv_spec); +libc_hidden_proto (__gconv_destroy_spec) + /* Free resources associated with transformation descriptor CD. */ extern int __gconv_close (__gconv_t cd) attribute_hidden; diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c index dd54bc12e0..5b30055c04 100644 --- a/iconv/iconv_open.c +++ b/iconv/iconv_open.c @@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode) int res = __gconv_open (&conv_spec, &cd, 0); - gconv_destroy_spec (&conv_spec); + __gconv_destroy_spec (&conv_spec); if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) { diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index b4334faa57..d59979759c 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -184,7 +184,7 @@ main (int argc, char *argv[]) /* Let's see whether we have these coded character sets. */ res = __gconv_open (&conv_spec, &cd, 0); - gconv_destroy_spec (&conv_spec); + __gconv_destroy_spec (&conv_spec); if (res != __GCONV_OK) { diff --git a/intl/dcigettext.c b/intl/dcigettext.c index 2e7c662bc7..bd332e71da 100644 --- a/intl/dcigettext.c +++ b/intl/dcigettext.c @@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, # ifdef _LIBC - struct gconv_spec conv_spec - = { .fromcode = norm_add_slashes (charset, ""), - .tocode = norm_add_slashes (outcharset, ""), - /* We always want to use transliteration. */ - .translit = true, - .ignore = false - }; + struct gconv_spec conv_spec; + + __gconv_create_spec (&conv_spec, charset, outcharset); + + /* We always want to use transliteration. */ + conv_spec.translit = true; + int r = __gconv_open (&conv_spec, &convd->conv, GCONV_AVOID_NOCONV); + + __gconv_destroy_spec (&conv_spec); + if (__builtin_expect (r != __GCONV_OK, 0)) { /* If the output encoding is the same there is diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c index fd70432eca..e9f6e5e09f 100644 --- a/intl/tst-codeset.c +++ b/intl/tst-codeset.c @@ -22,13 +22,11 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <support/check.h> static int do_test (void) { - char *s; - int result = 0; - unsetenv ("LANGUAGE"); unsetenv ("OUTPUT_CHARSET"); setlocale (LC_ALL, "de_DE.ISO-8859-1"); @@ -36,25 +34,21 @@ do_test (void) bindtextdomain ("codeset", OBJPFX "domaindir"); /* Here we expect output in ISO-8859-1. */ - s = gettext ("cheese"); - if (strcmp (s, "K\344se")) - { - printf ("call 1 returned: %s\n", s); - result = 1; - } + TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); + /* Here we expect output in UTF-8. */ bind_textdomain_codeset ("codeset", "UTF-8"); + TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); - /* Here we expect output in UTF-8. */ - s = gettext ("cheese"); - if (strcmp (s, "K\303\244se")) - { - printf ("call 2 returned: %s\n", s); - result = 1; - } - - return result; + /* `a with umlaut' is transliterated to `ae'. */ + bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); + + /* Transliteration also works by default even if not set. */ + bind_textdomain_codeset ("codeset", "ASCII"); + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); + + return 0; } -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include <support/test-driver.c> |