/* Implementation of the internal dcigettext function. Copyright (C) 1995-2023 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. */ /* Tell glibc's <string.h> to provide a prototype for mempcpy(). This must come before <config.h> because <config.h> may include <features.h>, and once <features.h> has been included, it's too late. */ #ifndef _GNU_SOURCE # define _GNU_SOURCE 1 #endif #ifdef HAVE_CONFIG_H # include <config.h> #endif #include <sys/types.h> #ifdef __GNUC__ # define alloca __builtin_alloca # define HAVE_ALLOCA 1 #else # ifdef _MSC_VER # include <malloc.h> # define alloca _alloca # else # if defined HAVE_ALLOCA_H || defined _LIBC # include <alloca.h> # else # ifdef _AIX #pragma alloca # else # ifndef alloca char *alloca (); # endif # endif # endif # endif #endif #include <errno.h> #ifndef errno extern int errno; #endif #ifndef __set_errno # define __set_errno(val) errno = (val) #endif #include <stddef.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #if defined HAVE_UNISTD_H || defined _LIBC # include <unistd.h> #endif #include <locale.h> #ifdef _LIBC /* Guess whether integer division by zero raises signal SIGFPE. Set to 1 only if you know for sure. In case of doubt, set to 0. */ # if defined __alpha__ || defined __arm__ || defined __i386__ \ || defined __m68k__ || defined __s390__ # define INTDIV0_RAISES_SIGFPE 1 # else # define INTDIV0_RAISES_SIGFPE 0 # endif #endif #if !INTDIV0_RAISES_SIGFPE # include <signal.h> #endif #if defined HAVE_SYS_PARAM_H || defined _LIBC # include <sys/param.h> #endif #if !defined _LIBC # include "localcharset.h" #endif #include "gettextP.h" #include "plural-exp.h" #ifdef _LIBC # include <libintl.h> #else # ifdef IN_LIBGLOCALE # include <libintl.h> # endif # include "libgnuintl.h" #endif #include "hash-string.h" /* Handle multi-threaded applications. */ #ifdef _LIBC # include <libc-lock.h> # define gl_rwlock_define_initialized __libc_rwlock_define_initialized # define gl_rwlock_rdlock __libc_rwlock_rdlock # define gl_rwlock_wrlock __libc_rwlock_wrlock # define gl_rwlock_unlock __libc_rwlock_unlock #else # include "lock.h" #endif /* Alignment of types. */ #if defined __GNUC__ && __GNUC__ >= 2 # define alignof(TYPE) __alignof__ (TYPE) #else # define alignof(TYPE) \ ((int) &((struct { char dummy1; TYPE dummy2; } *) 0)->dummy2) #endif /* Some compilers, like SunOS4 cc, don't have offsetof in <stddef.h>. */ #ifndef offsetof # define offsetof(type,ident) ((size_t)&(((type*)0)->ident)) #endif /* @@ end of prolog @@ */ #ifdef _LIBC /* Rename the non ANSI C functions. This is required by the standard because some ANSI C functions will require linking with this object file and the name space must not be polluted. */ # define strdup __strdup # define getcwd __getcwd # ifndef stpcpy # define stpcpy __stpcpy # endif # define tfind __tfind #else # if !defined HAVE_GETCWD char *getwd (); # define getcwd(buf, max) getwd (buf) # else # if VMS # define getcwd(buf, max) (getcwd) (buf, max, 0) # else char *getcwd (); # endif # endif # ifndef HAVE_STPCPY static char *stpcpy (char *dest, const char *src); # endif # ifndef HAVE_MEMPCPY static void *mempcpy (void *dest, const void *src, size_t n); # endif #endif /* Use a replacement if the system does not provide the `tsearch' function family. */ #if defined HAVE_TSEARCH || defined _LIBC # include <search.h> #else # define tsearch libintl_tsearch # define tfind libintl_tfind # define tdelete libintl_tdelete # define twalk libintl_twalk # include "tsearch.h" #endif #ifdef _LIBC # define tsearch __tsearch #endif /* Amount to increase buffer size by in each try. */ #define PATH_INCR 32 /* The following is from pathmax.h. */ /* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define PATH_MAX but might cause redefinition warnings when sys/param.h is later included (as on MORE/BSD 4.3). */ #if defined _POSIX_VERSION || (defined HAVE_LIMITS_H && !defined __GNUC__) # include <limits.h> #endif #ifndef _POSIX_PATH_MAX # define _POSIX_PATH_MAX 255 #endif #if !defined PATH_MAX && defined _PC_PATH_MAX # define PATH_MAX (__pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : __pathconf ("/", _PC_PATH_MAX)) #endif /* Don't include sys/param.h if it already has been. */ #if defined HAVE_SYS_PARAM_H && !defined PATH_MAX && !defined MAXPATHLEN # include <sys/param.h> #endif #if !defined PATH_MAX && defined MAXPATHLEN # define PATH_MAX MAXPATHLEN #endif #ifndef PATH_MAX # define PATH_MAX _POSIX_PATH_MAX #endif /* Pathname support. ISSLASH(C) tests whether C is a directory separator character. IS_ABSOLUTE_PATH(P) tests whether P is an absolute path. If it is not, it may be concatenated to a directory pathname. IS_PATH_WITH_DIR(P) tests whether P contains a directory specification. */ #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ /* Win32, Cygwin, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') # define HAS_DEVICE(P) \ ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) \ && (P)[1] == ':') # define IS_ABSOLUTE_PATH(P) (ISSLASH ((P)[0]) || HAS_DEVICE (P)) # define IS_PATH_WITH_DIR(P) \ (strchr (P, '/') != NULL || strchr (P, '\\') != NULL || HAS_DEVICE (P)) #else /* Unix */ # define ISSLASH(C) ((C) == '/') # define IS_ABSOLUTE_PATH(P) ISSLASH ((P)[0]) # define IS_PATH_WITH_DIR(P) (strchr (P, '/') != NULL) #endif /* Whether to support different locales in different threads. */ #if defined _LIBC || HAVE_USELOCALE || defined IN_LIBGLOCALE # define HAVE_PER_THREAD_LOCALE #endif /* This is the type used for the search tree where known translations are stored. */ struct known_translation_t { /* Domain in which to search. */ const char *domainname; /* The category. */ int category; #ifdef HAVE_PER_THREAD_LOCALE /* Name of the relevant locale category, or "" for the global locale. */ const char *localename; #endif #ifdef IN_LIBGLOCALE /* The character encoding. */ const char *encoding; #endif /* State of the catalog counter at the point the string was found. */ int counter; /* Catalog where the string was found. */ struct loaded_l10nfile *domain; /* And finally the translation. */ const char *translation; size_t translation_length; /* Pointer to the string in question. */ union { char appended[ZERO]; /* used if domain != NULL */ const char *ptr; /* used if domain == NULL */ } msgid; }; gl_rwlock_define_initialized (static, tree_lock) /* Root of the search tree with known translations. */ static void *root; /* Function to compare two entries in the table of known translations. */ static int transcmp (const void *p1, const void *p2) { const struct known_translation_t *s1; const struct known_translation_t *s2; int result; s1 = (const struct known_translation_t *) p1; s2 = (const struct known_translation_t *) p2; result = strcmp (s1->domain != NULL ? s1->msgid.appended : s1->msgid.ptr, s2->domain != NULL ? s2->msgid.appended : s2->msgid.ptr); if (result == 0) { result = strcmp (s1->domainname, s2->domainname); if (result == 0) { #ifdef HAVE_PER_THREAD_LOCALE result = strcmp (s1->localename, s2->localename); if (result == 0) #endif { #ifdef IN_LIBGLOCALE result = strcmp (s1->encoding, s2->encoding); if (result == 0) #endif /* We compare the category last (though this is the cheapest operation) since it is hopefully always the same (namely LC_MESSAGES). */ result = s1->category - s2->category; } } } return result; } /* Name of the default domain used for gettext(3) prior any call to textdomain(3). The default value for this is "messages". */ const char _nl_default_default_domain[] attribute_hidden = "messages"; #ifndef IN_LIBGLOCALE /* Value used as the default domain for gettext(3). */ const char *_nl_current_default_domain attribute_hidden = _nl_default_default_domain; #endif /* Contains the default location of the message catalogs. */ #if defined __EMX__ extern const char _nl_default_dirname[]; #else # ifdef _LIBC extern const char _nl_default_dirname[]; libc_hidden_proto (_nl_default_dirname) # endif const char _nl_default_dirname[] = LOCALEDIR; # ifdef _LIBC libc_hidden_data_def (_nl_default_dirname) # endif #endif #ifndef IN_LIBGLOCALE /* List with bindings of specific domains created by bindtextdomain() calls. */ struct binding *_nl_domain_bindings; #endif /* Prototypes for local functions. */ static char *plural_lookup (struct loaded_l10nfile *domain, unsigned long int n, const char *translation, size_t translation_len); #ifdef IN_LIBGLOCALE static const char *guess_category_value (int category, const char *categoryname, const char *localename); #else static const char *guess_category_value (int category, const char *categoryname); #endif #ifdef _LIBC # include "../locale/localeinfo.h" # define category_to_name(category) _nl_category_names_get (category) #else static const char *category_to_name (int category); #endif #if (defined _LIBC || HAVE_ICONV) && !defined IN_LIBGLOCALE static const char *get_output_charset (struct binding *domainbinding); #endif /* For those losing systems which don't have `alloca' we have to add some additional code emulating it. */ #ifdef HAVE_ALLOCA /* Nothing has to be done. */ # define freea(p) /* nothing */ # define ADD_BLOCK(list, address) /* nothing */ # define FREE_BLOCKS(list) /* nothing */ #else struct block_list { void *address; struct block_list *next; }; # define ADD_BLOCK(list, addr) \ do { \ struct block_list *newp = (struct block_list *) malloc (sizeof (*newp)); \ /* If we cannot get a free block we cannot add the new element to \ the list. */ \ if (newp != NULL) { \ newp->address = (addr); \ newp->next = (list); \ (list) = newp; \ } \ } while (0) # define FREE_BLOCKS(list) \ do { \ while (list != NULL) { \ struct block_list *old = list; \ list = list->next; \ free (old->address); \ free (old); \ } \ } while (0) # undef alloca # define alloca(size) (malloc (size)) # define freea(p) free (p) #endif /* have alloca */ #ifdef _LIBC /* List of blocks allocated for translations. */ typedef struct transmem_list { struct transmem_list *next; char data[ZERO]; } transmem_block_t; static struct transmem_list *transmem_list; #else typedef unsigned char transmem_block_t; #endif /* Names for the libintl functions are a problem. They must not clash with existing names and they should follow ANSI C. But this source code is also used in GNU C Library where the names have a __ prefix. So we have to make a difference here. */ #ifdef _LIBC # define DCIGETTEXT __dcigettext #else # define DCIGETTEXT libintl_dcigettext #endif /* Lock variable to protect the global data in the gettext implementation. */ gl_rwlock_define_initialized (, _nl_state_lock attribute_hidden) /* Checking whether the binaries runs SUID must be done and glibc provides easier methods therefore we make a difference here. */ #ifdef _LIBC # define ENABLE_SECURE __libc_enable_secure # define DETERMINE_SECURE #else # ifndef HAVE_GETUID # define getuid() 0 # endif # ifndef HAVE_GETGID # define getgid() 0 # endif # ifndef HAVE_GETEUID # define geteuid() getuid() # endif # ifndef HAVE_GETEGID # define getegid() getgid() # endif static int enable_secure; # define ENABLE_SECURE (enable_secure == 1) # define DETERMINE_SECURE \ if (enable_secure == 0) \ { \ if (getuid () != geteuid () || getgid () != getegid ()) \ enable_secure = 1; \ else \ enable_secure = -1; \ } #endif /* Get the function to evaluate the plural expression. */ #include "eval-plural.h" /* Look up MSGID in the DOMAINNAME message catalog for the current CATEGORY locale and, if PLURAL is nonzero, search over string depending on the plural form determined by N. */ #ifdef IN_LIBGLOCALE char * gl_dcigettext (const char *domainname, const char *msgid1, const char *msgid2, int plural, unsigned long int n, int category, const char *localename, const char *encoding) #else char * DCIGETTEXT (const char *domainname, const char *msgid1, const char *msgid2, int plural, unsigned long int n, int category) #endif { #ifndef HAVE_ALLOCA struct block_list *block_list = NULL; #endif struct loaded_l10nfile *domain; struct binding *binding; const char *categoryname; const char *categoryvalue; const char *dirname; char *xdirname = NULL; char *xdomainname; char *single_locale; char *retval; size_t retlen; int saved_errno; struct known_translation_t search; struct known_translation_t **foundp = NULL; #if defined HAVE_PER_THREAD_LOCALE && !defined IN_LIBGLOCALE const char *localename; #endif size_t domainname_len; /* If no real MSGID is given return NULL. */ if (msgid1 == NULL) return NULL; #ifdef _LIBC if (category < 0 || category >= __LC_LAST || category == LC_ALL) /* Bogus. */ return (plural == 0 ? (char *) msgid1 /* Use the Germanic plural rule. */ : n == 1 ? (char *) msgid1 : (char *) msgid2); #endif /* Preserve the `errno' value. */ saved_errno = errno; #ifdef _LIBC __libc_rwlock_define (extern, __libc_setlocale_lock attribute_hidden) __libc_rwlock_rdlock (__libc_setlocale_lock); #endif gl_rwlock_rdlock (_nl_state_lock); /* If DOMAINNAME is NULL, we are interested in the default domain. If CATEGORY is not LC_MESSAGES this might not make much sense but the definition left this undefined. */ if (domainname == NULL) domainname = _nl_current_default_domain; /* OS/2 specific: backward compatibility with older libintl versions */ #ifdef LC_MESSAGES_COMPAT if (category == LC_MESSAGES_COMPAT) category = LC_MESSAGES; #endif /* Try to find the translation among those which we found at some time. */ search.domain = NULL; search.msgid.ptr = msgid1; search.domainname = domainname; search.category = category; #ifdef HAVE_PER_THREAD_LOCALE # ifndef IN_LIBGLOCALE # ifdef _LIBC localename = __current_locale_name (category); # else categoryname = category_to_name (category); # define CATEGORYNAME_INITIALIZED localename = _nl_locale_name_thread_unsafe (category, categoryname); if (localename == NULL) localename = ""; # endif # endif search.localename = localename; # ifdef IN_LIBGLOCALE search.encoding = encoding; # endif /* Since tfind/tsearch manage a balanced tree, concurrent tfind and tsearch calls can be fatal. */ gl_rwlock_rdlock (tree_lock); foundp = (struct known_translation_t **) tfind (&search, &root, transcmp); gl_rwlock_unlock (tree_lock); if (foundp != NULL && (*foundp)->counter == _nl_msg_cat_cntr) { /* Now deal with plural. */ if (plural) retval = plural_lookup ((*foundp)->domain, n, (*foundp)->translation, (*foundp)->translation_length); else retval = (char *) (*foundp)->translation; gl_rwlock_unlock (_nl_state_lock); # ifdef _LIBC __libc_rwlock_unlock (__libc_setlocale_lock); # endif __set_errno (saved_errno); return retval; } #endif /* See whether this is a SUID binary or not. */ DETERMINE_SECURE; /* First find matching binding. */ #ifdef IN_LIBGLOCALE /* We can use a trivial binding, since _nl_find_msg will ignore it anyway, and _nl_load_domain and _nl_find_domain just pass it through. */ binding = NULL; dirname = bindtextdomain (domainname, NULL); #else for (binding = _nl_domain_bindings; binding != NULL; binding = binding->next) { int compare = strcmp (domainname, binding->domainname); if (compare == 0) /* We found it! */ break; if (compare < 0) { /* It is not in the list. */ binding = NULL; break; } } if (binding == NULL) dirname = _nl_default_dirname; else { dirname = binding->dirname; #endif if (!IS_ABSOLUTE_PATH (dirname)) { /* We have a relative path. Make it absolute now. */ char *cwd = getcwd (NULL, 0); if (cwd == NULL) /* We cannot get the current working directory. Don't signal an error but simply return the default string. */ goto return_untranslated; int ret = __asprintf (&xdirname, "%s/%s", cwd, dirname); free (cwd); if (ret < 0) goto return_untranslated; dirname = xdirname; } #ifndef IN_LIBGLOCALE } #endif /* Now determine the symbolic name of CATEGORY and its value. */ #ifndef CATEGORYNAME_INITIALIZED categoryname = category_to_name (category); #endif #ifdef IN_LIBGLOCALE categoryvalue = guess_category_value (category, categoryname, localename); #else categoryvalue = guess_category_value (category, categoryname); #endif domainname_len = strlen (domainname); xdomainname = (char *) alloca (strlen (categoryname) + domainname_len + 5); ADD_BLOCK (block_list, xdomainname); stpcpy ((char *) mempcpy (stpcpy (stpcpy (xdomainname, categoryname), "/"), domainname, domainname_len), ".mo"); /* Creating working area. */ single_locale = (char *) alloca (strlen (categoryvalue) + 1); ADD_BLOCK (block_list, single_locale); /* Search for the given string. This is a loop because we perhaps got an ordered list of languages to consider for the translation. */ while (1) { /* Make CATEGORYVALUE point to the next element of the list. */ while (categoryvalue[0] != '\0' && categoryvalue[0] == ':') ++categoryvalue; if (categoryvalue[0] == '\0') { /* The whole contents of CATEGORYVALUE has been searched but no valid entry has been found. We solve this situation by implicitly appending a "C" entry, i.e. no translation will take place. */ single_locale[0] = 'C'; single_locale[1] = '\0'; } else { char *cp = single_locale; while (categoryvalue[0] != '\0' && categoryvalue[0] != ':') *cp++ = *categoryvalue++; *cp = '\0'; /* When this is a SUID binary we must not allow accessing files outside the dedicated directories. */ if (ENABLE_SECURE && IS_PATH_WITH_DIR (single_locale)) /* Ingore this entry. */ continue; } /* If the current locale value is C (or POSIX) we don't load a domain. Return the MSGID. */ if (strcmp (single_locale, "C") == 0 || strcmp (single_locale, "POSIX") == 0) break; /* Find structure describing the message catalog matching the DOMAINNAME and CATEGORY. */ domain = _nl_find_domain (dirname, single_locale, xdomainname, binding); if (domain != NULL) { #if defined IN_LIBGLOCALE retval = _nl_find_msg (domain, binding, encoding, msgid1, &retlen); #else retval = _nl_find_msg (domain, binding, msgid1, 1, &retlen); #endif if (retval == NULL) { int cnt; for (cnt = 0; domain->successor[cnt] != NULL; ++cnt) { #if defined IN_LIBGLOCALE retval = _nl_find_msg (domain->successor[cnt], binding, encoding, msgid1, &retlen); #else retval = _nl_find_msg (domain->successor[cnt], binding, msgid1, 1, &retlen); #endif /* Resource problems are not fatal, instead we return no translation. */ if (__builtin_expect (retval == (char *) -1, 0)) goto return_untranslated; if (retval != NULL) { domain = domain->successor[cnt]; break; } } } /* Returning -1 means that some resource problem exists (likely memory) and that the strings could not be converted. Return the original strings. */ if (__builtin_expect (retval == (char *) -1, 0)) break; if (retval != NULL) { /* Found the translation of MSGID1 in domain DOMAIN: starting at RETVAL, RETLEN bytes. */ free (xdirname); FREE_BLOCKS (block_list); if (foundp == NULL) { /* Create a new entry and add it to the search tree. */ size_t msgid_len; size_t size; struct known_translation_t *newp; msgid_len = strlen (msgid1) + 1; size = offsetof (struct known_translation_t, msgid) + msgid_len + domainname_len + 1; #ifdef HAVE_PER_THREAD_LOCALE size += strlen (localename) + 1; #endif newp = (struct known_translation_t *) malloc (size); if (newp != NULL) { char *new_domainname; #ifdef HAVE_PER_THREAD_LOCALE char *new_localename; #endif new_domainname = (char *) mempcpy (newp->msgid.appended, msgid1, msgid_len); memcpy (new_domainname, domainname, domainname_len + 1); #ifdef HAVE_PER_THREAD_LOCALE new_localename = new_domainname + domainname_len + 1; strcpy (new_localename, localename); #endif newp->domainname = new_domainname; newp->category = category; #ifdef HAVE_PER_THREAD_LOCALE newp->localename = new_localename; #endif #ifdef IN_LIBGLOCALE newp->encoding = encoding; #endif newp->counter = _nl_msg_cat_cntr; newp->domain = domain; newp->translation = retval; newp->translation_length = retlen; gl_rwlock_wrlock (tree_lock); /* Insert the entry in the search tree. */ foundp = (struct known_translation_t **) tsearch (newp, &root, transcmp); gl_rwlock_unlock (tree_lock); if (foundp == NULL || __builtin_expect (*foundp != newp, 0)) /* The insert failed. */ free (newp); } } else { /* We can update the existing entry. */ (*foundp)->counter = _nl_msg_cat_cntr; (*foundp)->domain = domain; (*foundp)->translation = retval; (*foundp)->translation_length = retlen; } __set_errno (saved_errno); /* Now deal with plural. */ if (plural) retval = plural_lookup (domain, n, retval, retlen); gl_rwlock_unlock (_nl_state_lock); #ifdef _LIBC __libc_rwlock_unlock (__libc_setlocale_lock); #endif return retval; } } } return_untranslated: /* Return the untranslated MSGID. */ free (xdirname); FREE_BLOCKS (block_list); gl_rwlock_unlock (_nl_state_lock); #ifdef _LIBC __libc_rwlock_unlock (__libc_setlocale_lock); #endif #ifndef _LIBC if (!ENABLE_SECURE) { extern void _nl_log_untranslated (const char *logfilename, const char *domainname, const char *msgid1, const char *msgid2, int plural); const char *logfilename = getenv ("GETTEXT_LOG_UNTRANSLATED"); if (logfilename != NULL && logfilename[0] != '\0') _nl_log_untranslated (logfilename, domainname, msgid1, msgid2, plural); } #endif __set_errno (saved_errno); return (plural == 0 ? (char *) msgid1 /* Use the Germanic plural rule. */ : n == 1 ? (char *) msgid1 : (char *) msgid2); } /* Look up the translation of msgid within DOMAIN_FILE and DOMAINBINDING. Return it if found. Return NULL if not found or in case of a conversion failure (problem in the particular message catalog). Return (char *) -1 in case of a memory allocation failure during conversion (only if ENCODING != NULL resp. CONVERT == true). */ char * #ifdef IN_LIBGLOCALE _nl_find_msg (struct loaded_l10nfile *domain_file, struct binding *domainbinding, const char *encoding, const char *msgid, size_t *lengthp) #else _nl_find_msg (struct loaded_l10nfile *domain_file, struct binding *domainbinding, const char *msgid, int convert, size_t *lengthp) #endif { struct loaded_domain *domain; nls_uint32 nstrings; size_t act; char *result; size_t resultlen; if (domain_file->decided <= 0) _nl_load_domain (domain_file, domainbinding); if (domain_file->data == NULL) return NULL; domain = (struct loaded_domain *) domain_file->data; nstrings = domain->nstrings; /* Locate the MSGID and its translation. */ if (domain->hash_tab != NULL) { /* Use the hashing table. */ nls_uint32 len = strlen (msgid); nls_uint32 hash_val = __hash_string (msgid); nls_uint32 idx = hash_val % domain->hash_size; nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2)); while (1) { nls_uint32 nstr = W (domain->must_swap_hash_tab, domain->hash_tab[idx]); if (nstr == 0) /* Hash table entry is empty. */ return NULL; nstr--; /* Compare msgid with the original string at index nstr. We compare the lengths with >=, not ==, because plural entries are represented by strings with an embedded NUL. */ if (nstr < nstrings ? W (domain->must_swap, domain->orig_tab[nstr].length) >= len && (strcmp (msgid, domain->data + W (domain->must_swap, domain->orig_tab[nstr].offset)) == 0) : domain->orig_sysdep_tab[nstr - nstrings].length > len && (strcmp (msgid, domain->orig_sysdep_tab[nstr - nstrings].pointer) == 0)) { act = nstr; goto found; } if (idx >= domain->hash_size - incr) idx -= domain->hash_size - incr; else idx += incr; } /* NOTREACHED */ } else { /* Try the default method: binary search in the sorted array of messages. */ size_t top, bottom; bottom = 0; top = nstrings; while (bottom < top) { int cmp_val; act = (bottom + top) / 2; cmp_val = strcmp (msgid, (domain->data + W (domain->must_swap, domain->orig_tab[act].offset))); if (cmp_val < 0) top = act; else if (cmp_val > 0) bottom = act + 1; else goto found; } /* No translation was found. */ return NULL; } found: /* The translation was found at index ACT. If we have to convert the string to use a different character set, this is the time. */ if (act < nstrings) { result = (char *) (domain->data + W (domain->must_swap, domain->trans_tab[act].offset)); resultlen = W (domain->must_swap, domain->trans_tab[act].length) + 1; } else { result = (char *) domain->trans_sysdep_tab[act - nstrings].pointer; resultlen = domain->trans_sysdep_tab[act - nstrings].length; } #if defined _LIBC || HAVE_ICONV # ifdef IN_LIBGLOCALE if (encoding != NULL) # else if (convert) # endif { /* We are supposed to do a conversion. */ # ifndef IN_LIBGLOCALE const char *encoding = get_output_charset (domainbinding); # endif size_t nconversions; struct converted_domain *convd; size_t i; /* Protect against reallocation of the table. */ gl_rwlock_rdlock (domain->conversions_lock); /* Search whether a table with converted translations for this encoding has already been allocated. */ nconversions = domain->nconversions; convd = NULL; for (i = nconversions; i > 0; ) { i--; if (strcmp (domain->conversions[i].encoding, encoding) == 0) { convd = &domain->conversions[i]; break; } } gl_rwlock_unlock (domain->conversions_lock); if (convd == NULL) { /* We have to allocate a new conversions table. */ gl_rwlock_wrlock (domain->conversions_lock); nconversions = domain->nconversions; /* Maybe in the meantime somebody added the translation. Recheck. */ for (i = nconversions; i > 0; ) { i--; if (strcmp (domain->conversions[i].encoding, encoding) == 0) { convd = &domain->conversions[i]; goto found_convd; } } { /* Allocate a table for the converted translations for this encoding. */ struct converted_domain *new_conversions = (struct converted_domain *) (domain->conversions != NULL ? realloc (domain->conversions, (nconversions + 1) * sizeof (struct converted_domain)) : malloc ((nconversions + 1) * sizeof (struct converted_domain))); if (__builtin_expect (new_conversions == NULL, 0)) { /* Nothing we can do, no more memory. We cannot use the translation because it might be encoded incorrectly. */ unlock_fail: gl_rwlock_unlock (domain->conversions_lock); return (char *) -1; } domain->conversions = new_conversions; /* Copy the 'encoding' string to permanent storage. */ encoding = strdup (encoding); if (__builtin_expect (encoding == NULL, 0)) /* Nothing we can do, no more memory. We cannot use the translation because it might be encoded incorrectly. */ goto unlock_fail; convd = &new_conversions[nconversions]; convd->encoding = encoding; /* Find out about the character set the file is encoded with. This can be found (in textual form) in the entry "". If this entry does not exist or if this does not contain the 'charset=' information, we will assume the charset matches the one the current locale and we don't have to perform any conversion. */ # ifdef _LIBC convd->conv = (__gconv_t) -1; # else # if HAVE_ICONV convd->conv = (iconv_t) -1; # endif # endif { char *nullentry; size_t nullentrylen; /* Get the header entry. This is a recursion, but it doesn't reallocate domain->conversions because we pass encoding = NULL or convert = 0, respectively. */ nullentry = # ifdef IN_LIBGLOCALE _nl_find_msg (domain_file, domainbinding, NULL, "", &nullentrylen); # else _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen); # endif /* Resource problems are fatal. If we continue onwards we will only attempt to calloc a new conv_tab and fail later. */ if (__builtin_expect (nullentry == (char *) -1, 0)) return (char *) -1; if (nullentry != NULL) { const char *charsetstr; charsetstr = strstr (nullentry, "charset="); if (charsetstr != NULL) { size_t len; char *charset; const char *outcharset; charsetstr += strlen ("charset="); len = strcspn (charsetstr, " \t\n"); charset = (char *) alloca (len + 1); # if defined _LIBC || HAVE_MEMPCPY *((char *) mempcpy (charset, charsetstr, len)) = '\0'; # else memcpy (charset, charsetstr, len); charset[len] = '\0'; # endif outcharset = encoding; # ifdef _LIBC struct gconv_spec conv_spec; __gconv_create_spec (&conv_spec, charset, outcharset); /* We always want to use transliteration. */ conv_spec.translit = true; int r = __gconv_open (&conv_spec, &convd->conv, GCONV_AVOID_NOCONV); __gconv_destroy_spec (&conv_spec); if (__builtin_expect (r != __GCONV_OK, 0)) { /* If the output encoding is the same there is nothing to do. Otherwise do not use the translation at all. */ if (__builtin_expect (r != __GCONV_NULCONV, 1)) { gl_rwlock_unlock (domain->conversions_lock); free ((char *) encoding); return NULL; } convd->conv = (__gconv_t) -1; } # else # if HAVE_ICONV /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, we want to use transliteration. */ # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \ && !defined __UCLIBC__) \ || _LIBICONV_VERSION >= 0x0105 if (strchr (outcharset, '/') == NULL) { char *tmp; len = strlen (outcharset); tmp = (char *) alloca (len + 10 + 1); memcpy (tmp, outcharset, len); memcpy (tmp + len, "//TRANSLIT", 10 + 1); outcharset = tmp; convd->conv = iconv_open (outcharset, charset); freea (outcharset); } else # endif convd->conv = iconv_open (outcharset, charset); # endif # endif freea (charset); } } } convd->conv_tab = NULL; /* Here domain->conversions is still == new_conversions. */ domain->nconversions++; } found_convd: gl_rwlock_unlock (domain->conversions_lock); } if ( # ifdef _LIBC convd->conv != (__gconv_t) -1 # else # if HAVE_ICONV convd->conv != (iconv_t) -1 # endif # endif ) { /* We are supposed to do a conversion. First allocate an appropriate table with the same structure as the table of translations in the file, where we can put the pointers to the converted strings in. There is a slight complication with plural entries. They are represented by consecutive NUL terminated strings. We handle this case by converting RESULTLEN bytes, including NULs. */ /* This lock primarily protects the memory management variables freemem, freemem_size. It also protects write accesses to convd->conv_tab. It's not worth using a separate lock (such as domain->conversions_lock) for this purpose, because when modifying convd->conv_tab, we also need to lock freemem, freemem_size for most of the time. */ __libc_lock_define_initialized (static, lock) if (__builtin_expect (convd->conv_tab == NULL, 0)) { __libc_lock_lock (lock); if (convd->conv_tab == NULL) { convd->conv_tab = (char **) calloc (nstrings + domain->n_sysdep_strings, sizeof (char *)); if (convd->conv_tab != NULL) goto not_translated_yet; /* Mark that we didn't succeed allocating a table. */ convd->conv_tab = (char **) -1; } __libc_lock_unlock (lock); } if (__builtin_expect (convd->conv_tab == (char **) -1, 0)) /* Nothing we can do, no more memory. We cannot use the translation because it might be encoded incorrectly. */ return (char *) -1; if (convd->conv_tab[act] == NULL) { /* We haven't used this string so far, so it is not translated yet. Do this now. */ /* We use a bit more efficient memory handling. We allocate always larger blocks which get used over time. This is faster than many small allocations. */ # define INITIAL_BLOCK_SIZE 4080 static unsigned char *freemem; static size_t freemem_size; const unsigned char *inbuf; unsigned char *outbuf; int malloc_count; # ifndef _LIBC transmem_block_t *transmem_list; # endif __libc_lock_lock (lock); not_translated_yet: inbuf = (const unsigned char *) result; outbuf = freemem + sizeof (size_t); # ifndef _LIBC transmem_list = NULL; # endif malloc_count = 0; while (1) { transmem_block_t *newmem; # ifdef _LIBC size_t non_reversible; int res; if (freemem_size < sizeof (size_t)) goto resize_freemem; res = __gconv (convd->conv, &inbuf, inbuf + resultlen, &outbuf, outbuf + freemem_size - sizeof (size_t), &non_reversible); if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT) break; if (res != __GCONV_FULL_OUTPUT) { /* We should not use the translation at all, it is incorrectly encoded. */ __libc_lock_unlock (lock); return NULL; } inbuf = (const unsigned char *) result; # else # if HAVE_ICONV const char *inptr = (const char *) inbuf; size_t inleft = resultlen; char *outptr = (char *) outbuf; size_t outleft; if (freemem_size < sizeof (size_t)) goto resize_freemem; outleft = freemem_size - sizeof (size_t); if (iconv (convd->conv, (ICONV_CONST char **) &inptr, &inleft, &outptr, &outleft) != (size_t) (-1)) { outbuf = (unsigned char *) outptr; break; } if (errno != E2BIG) { __libc_lock_unlock (lock); return NULL; } # endif # endif resize_freemem: /* We must allocate a new buffer or resize the old one. */ if (malloc_count > 0) { ++malloc_count; freemem_size = malloc_count * INITIAL_BLOCK_SIZE; newmem = (transmem_block_t *) realloc (transmem_list, freemem_size); # ifdef _LIBC if (newmem != NULL) transmem_list = newmem; else { struct transmem_list *old = transmem_list; transmem_list = transmem_list->next; free (old); } # endif } else { malloc_count = 1; freemem_size = INITIAL_BLOCK_SIZE; newmem = (transmem_block_t *) malloc (freemem_size); # ifdef _LIBC if (newmem != NULL) { /* Add the block to the list of blocks we have to free at some point. */ newmem->next = transmem_list; transmem_list = newmem; } /* Fall through and return -1. */ # endif } if (__builtin_expect (newmem == NULL, 0)) { freemem = NULL; freemem_size = 0; __libc_lock_unlock (lock); return (char *) -1; } # ifdef _LIBC freemem = (unsigned char *) newmem->data; freemem_size -= offsetof (struct transmem_list, data); # else transmem_list = newmem; freemem = newmem; # endif outbuf = freemem + sizeof (size_t); } /* We have now in our buffer a converted string. Put this into the table of conversions. */ *(size_t *) freemem = outbuf - freemem - sizeof (size_t); convd->conv_tab[act] = (char *) freemem; /* Shrink freemem, but keep it aligned. */ freemem_size -= outbuf - freemem; freemem = outbuf; freemem += freemem_size & (alignof (size_t) - 1); freemem_size = freemem_size & ~ (alignof (size_t) - 1); __libc_lock_unlock (lock); } /* Now convd->conv_tab[act] contains the translation of all the plural variants. */ result = convd->conv_tab[act] + sizeof (size_t); resultlen = *(size_t *) convd->conv_tab[act]; } } /* The result string is converted. */ #endif /* _LIBC || HAVE_ICONV */ *lengthp = resultlen; return result; } /* Look up a plural variant. */ static char * plural_lookup (struct loaded_l10nfile *domain, unsigned long int n, const char *translation, size_t translation_len) { struct loaded_domain *domaindata = (struct loaded_domain *) domain->data; unsigned long int index; const char *p; index = plural_eval (domaindata->plural, n); if (index >= domaindata->nplurals) /* This should never happen. It means the plural expression and the given maximum value do not match. */ index = 0; /* Skip INDEX strings at TRANSLATION. */ p = translation; while (index-- > 0) { p = strchr (p, '\0'); /* And skip over the NUL byte. */ p++; if (p >= translation + translation_len) /* This should never happen. It means the plural expression evaluated to a value larger than the number of variants available for MSGID1. */ return (char *) translation; } return (char *) p; } #ifndef _LIBC /* Return string representation of locale CATEGORY. */ static const char * category_to_name (int category) { const char *retval; switch (category) { #ifdef LC_COLLATE case LC_COLLATE: retval = "LC_COLLATE"; break; #endif #ifdef LC_CTYPE case LC_CTYPE: retval = "LC_CTYPE"; break; #endif #ifdef LC_MONETARY case LC_MONETARY: retval = "LC_MONETARY"; break; #endif #ifdef LC_NUMERIC case LC_NUMERIC: retval = "LC_NUMERIC"; break; #endif #ifdef LC_TIME case LC_TIME: retval = "LC_TIME"; break; #endif #ifdef LC_MESSAGES case LC_MESSAGES: retval = "LC_MESSAGES"; break; #endif #ifdef LC_RESPONSE case LC_RESPONSE: retval = "LC_RESPONSE"; break; #endif #ifdef LC_ALL case LC_ALL: /* This might not make sense but is perhaps better than any other value. */ retval = "LC_ALL"; break; #endif default: /* If you have a better idea for a default value let me know. */ retval = "LC_XXX"; } return retval; } #endif /* Guess value of current locale from value of the environment variables or system-dependent defaults. */ static const char * #ifdef IN_LIBGLOCALE guess_category_value (int category, const char *categoryname, const char *locale) #else guess_category_value (int category, const char *categoryname) #endif { const char *language; #ifndef IN_LIBGLOCALE const char *locale; # ifndef _LIBC const char *language_default; int locale_defaulted; # endif #endif /* We use the settings in the following order: 1. The value of the environment variable 'LANGUAGE'. This is a GNU extension. Its value can be a colon-separated list of locale names. 2. The value of the environment variable 'LC_ALL', 'LC_xxx', or 'LANG'. More precisely, the first among these that is set to a non-empty value. This is how POSIX specifies it. The value is a single locale name. 3. A system-dependent preference list of languages. Its value can be a colon-separated list of locale names. 4. A system-dependent default locale name. This way: - System-dependent settings can be overridden by environment variables. - If the system provides both a list of languages and a default locale, the former is used. */ #ifndef IN_LIBGLOCALE /* Fetch the locale name, through the POSIX method of looking to `LC_ALL', `LC_xxx', and `LANG'. On some systems this can be done by the `setlocale' function itself. */ # ifdef _LIBC locale = __current_locale_name (category); # else locale_defaulted = 0; # if HAVE_USELOCALE locale = _nl_locale_name_thread_unsafe (category, categoryname); if (locale == NULL) # endif { locale = _nl_locale_name_posix (category, categoryname); if (locale == NULL) { locale = _nl_locale_name_default (); locale_defaulted = 1; } } # endif #endif /* Ignore LANGUAGE and its system-dependent analogon if the locale is set to "C" because 1. "C" locale usually uses the ASCII encoding, and most international messages use non-ASCII characters. These characters get displayed as question marks (if using glibc's iconv()) or as invalid 8-bit characters (because other iconv()s refuse to convert most non-ASCII characters to ASCII). In any case, the output is ugly. 2. The precise output of some programs in the "C" locale is specified by POSIX and should not depend on environment variables like "LANGUAGE" or system-dependent information. We allow such programs to use gettext(). Ignore LANGUAGE and its system-dependent analogon also if the locale is set to "C.UTF-8" or, more generally, to "C.<encoding>", because that's the by-design behaviour for glibc, see <https://sourceware.org/glibc/wiki/Proposals/C.UTF-8>. */ if (locale[0] == 'C' && (locale[1] == '\0' || locale[1] == '.')) return locale; /* The highest priority value is the value of the 'LANGUAGE' environment variable. */ language = getenv ("LANGUAGE"); if (language != NULL && language[0] != '\0') return language; #if !defined IN_LIBGLOCALE && !defined _LIBC /* The next priority value is the locale name, if not defaulted. */ if (locale_defaulted) { /* The next priority value is the default language preferences list. */ language_default = _nl_language_preferences_default (); if (language_default != NULL) return language_default; } /* The least priority value is the locale name, if defaulted. */ #endif return locale; } #if (defined _LIBC || HAVE_ICONV) && !defined IN_LIBGLOCALE /* Returns the output charset. */ static const char * get_output_charset (struct binding *domainbinding) { /* The output charset should normally be determined by the locale. But sometimes the locale is not used or not correctly set up, so we provide a possibility for the user to override this: the OUTPUT_CHARSET environment variable. Moreover, the value specified through bind_textdomain_codeset overrides both. */ if (domainbinding != NULL && domainbinding->codeset != NULL) return domainbinding->codeset; else { /* For speed reasons, we look at the value of OUTPUT_CHARSET only once. This is a user variable that is not supposed to change during a program run. */ static char *output_charset_cache; static int output_charset_cached; if (!output_charset_cached) { const char *value = getenv ("OUTPUT_CHARSET"); if (value != NULL && value[0] != '\0') { size_t len = strlen (value) + 1; char *value_copy = (char *) malloc (len); if (value_copy != NULL) memcpy (value_copy, value, len); output_charset_cache = value_copy; } output_charset_cached = 1; } if (output_charset_cache != NULL) return output_charset_cache; else { # ifdef _LIBC return _NL_CURRENT (LC_CTYPE, CODESET); # else # if HAVE_ICONV return locale_charset (); # endif # endif } } } #endif /* @@ begin of epilog @@ */ /* We don't want libintl.a to depend on any other library. So we avoid the non-standard function stpcpy. In GNU C Library this function is available, though. Also allow the symbol HAVE_STPCPY to be defined. */ #if !_LIBC && !HAVE_STPCPY static char * stpcpy (char *dest, const char *src) { while ((*dest++ = *src++) != '\0') /* Do nothing. */ ; return dest - 1; } #endif #if !_LIBC && !HAVE_MEMPCPY static void * mempcpy (void *dest, const void *src, size_t n) { return (void *) ((char *) memcpy (dest, src, n) + n); } #endif #if !_LIBC && !HAVE_TSEARCH # include "tsearch.c" #endif #ifdef _LIBC /* If we want to free all resources we have to do some work at program's end. */ void __intl_freemem (void) { void *old; while (_nl_domain_bindings != NULL) { struct binding *oldp = _nl_domain_bindings; _nl_domain_bindings = _nl_domain_bindings->next; if (oldp->dirname != _nl_default_dirname) /* Yes, this is a pointer comparison. */ free (oldp->dirname); free (oldp->codeset); free (oldp); } if (_nl_current_default_domain != _nl_default_default_domain) /* Yes, again a pointer comparison. */ free ((char *) _nl_current_default_domain); /* Remove the search tree with the known translations. */ __tdestroy (root, free); root = NULL; while (transmem_list != NULL) { old = transmem_list; transmem_list = transmem_list->next; free (old); } } #endif