about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2017-03-21 08:59:48 -0400
committerRich Felker <dalias@aerifal.cx>2017-03-21 10:56:08 -0400
commite4fc9ad780e36c84e1ed6b0fc01b3c53ae65ff9d (patch)
tree82d15347988c014c8784a1065f5748575dbf6f34
parent16319a5df9d50cfc642ffc8db76bc36562d4b3dd (diff)
downloadmusl-e4fc9ad780e36c84e1ed6b0fc01b3c53ae65ff9d.tar.gz
musl-e4fc9ad780e36c84e1ed6b0fc01b3c53ae65ff9d.tar.xz
musl-e4fc9ad780e36c84e1ed6b0fc01b3c53ae65ff9d.zip
search locale name variants for gettext translations
often translations will be named only by language, whereas locale
names may also include a territory code, modifier, and codeset
portion. previously, only translations exactly matching the locale
name were loaded. this was a major usability issue, requiring
workarounds like symlinks or tweaking of the locale name.

with these changes, gettext now searches for translations by first
removing the codeset portion of the locale name, then trying the
remainder in full, with modifier (@mod) removed, with territory code
(_XX) removed, and with both removed.

part of the reason gettext lacked support for searching fallbacks
before is that the candidate pathname for a translation file was
constructed on each call and used as the key to lookup an
already-mapped translation file. this was very costly/inefficient. we
now use the tuple of textdomain binding pointer, locale map pointer,
and integer category id as the key for looking up a translation file
mapping.

based on patch by He X.
-rw-r--r--src/locale/dcngettext.c87
1 files changed, 55 insertions, 32 deletions
diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
index b68e24bc..b79b7010 100644
--- a/src/locale/dcngettext.c
+++ b/src/locale/dcngettext.c
@@ -100,7 +100,9 @@ struct msgcat {
 	size_t map_size;
 	void *volatile plural_rule;
 	volatile int nplurals;
-	char name[];
+	struct binding *binding;
+	const struct __locale_map *lm;
+	int cat;
 };
 
 static char *dummy_gettextdomain()
@@ -120,8 +122,8 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
 	struct msgcat *p;
 	struct __locale_struct *loc = CURRENT_LOCALE;
 	const struct __locale_map *lm;
-	const char *dirname, *locname, *catname;
-	size_t dirlen, loclen, catlen, domlen;
+	size_t domlen;
+	struct binding *q;
 
 	if ((unsigned)category >= LC_ALL) goto notrans;
 
@@ -130,55 +132,76 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
 	domlen = strnlen(domainname, NAME_MAX+1);
 	if (domlen > NAME_MAX) goto notrans;
 
-	dirname = gettextdir(domainname, &dirlen);
-	if (!dirname) goto notrans;
+	for (q=bindings; q; q=q->next)
+		if (!strcmp(q->domainname, domainname) && q->active)
+			break;
+	if (!q) goto notrans;
 
 	lm = loc->cat[category];
 	if (!lm) {
 notrans:
 		return (char *) ((n == 1) ? msgid1 : msgid2);
 	}
-	locname = lm->name;
-
-	catname = catnames[category];
-	catlen = catlens[category];
-	loclen = strlen(locname);
-
-	size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3;
-	char name[namelen+1], *s = name;
-
-	memcpy(s, dirname, dirlen);
-	s[dirlen] = '/';
-	s += dirlen + 1;
-	memcpy(s, locname, loclen);
-	s[loclen] = '/';
-	s += loclen + 1;
-	memcpy(s, catname, catlen);
-	s[catlen] = '/';
-	s += catlen + 1;
-	memcpy(s, domainname, domlen);
-	s[domlen] = '.';
-	s[domlen+1] = 'm';
-	s[domlen+2] = 'o';
-	s[domlen+3] = 0;
 
 	for (p=cats; p; p=p->next)
-		if (!strcmp(p->name, name))
+		if (p->binding == q && p->lm == lm && p->cat == category)
 			break;
 
 	if (!p) {
+		const char *dirname, *locname, *catname, *modname, *locp;
+		size_t dirlen, loclen, catlen, modlen, alt_modlen;
 		void *old_cats;
 		size_t map_size;
-		const void *map = __map_file(name, &map_size);
+
+		dirname = q->dirname;
+		locname = lm->name;
+		catname = catnames[category];
+
+		dirlen = q->dirlen;
+		loclen = strlen(locname);
+		catlen = catlens[category];
+
+		/* Logically split @mod suffix from locale name. */
+		modname = memchr(locname, '@', loclen);
+		if (!modname) modname = locname + loclen;
+		alt_modlen = modlen = loclen - (modname-locname);
+		loclen = modname-locname;
+
+		/* Drop .charset identifier; it is not used. */
+		const char *csp = memchr(locname, '.', loclen);
+		if (csp) loclen = csp-locname;
+
+		char name[dirlen+1 + loclen+modlen+1 + catlen+1 + domlen+3 + 1];
+		const void *map;
+
+		for (;;) {
+			snprintf(name, sizeof name, "%s/%.*s%.*s/%s/%s.mo\0",
+				dirname, (int)loclen, locname,
+				(int)alt_modlen, modname, catname, domainname);
+			if (map = __map_file(name, &map_size)) break;
+
+			/* Try dropping @mod, _YY, then both. */
+			if (alt_modlen) {
+				alt_modlen = 0;
+			} else if ((locp = memchr(locname, '_', loclen))) {
+				loclen = locp-locname;
+				alt_modlen = modlen;
+			} else {
+				break;
+			}
+		}
 		if (!map) goto notrans;
-		p = calloc(sizeof *p + namelen + 1, 1);
+
+		p = calloc(sizeof *p, 1);
 		if (!p) {
 			__munmap((void *)map, map_size);
 			goto notrans;
 		}
+		p->cat = category;
+		p->binding = q;
+		p->lm = lm;
 		p->map = map;
 		p->map_size = map_size;
-		memcpy(p->name, name, namelen+1);
 		do {
 			old_cats = cats;
 			p->next = old_cats;