about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gen-locales.mk13
-rw-r--r--locale/findlocale.c19
-rw-r--r--locale/loadlocale.c2
-rw-r--r--localedata/Makefile4
-rw-r--r--localedata/gen-locale.sh24
-rw-r--r--localedata/tst-locale-loadlocale.c67
6 files changed, 119 insertions, 10 deletions
diff --git a/gen-locales.mk b/gen-locales.mk
index 9c523d2a05..b005a72866 100644
--- a/gen-locales.mk
+++ b/gen-locales.mk
@@ -1,8 +1,19 @@
 # defines target $(gen-locales) that generates the locales given in $(LOCALES)
 
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^@ ]*\(@[^ ]*\)\?/\1\2/g')
+# The CHARMAPS dependency handling must be able to process:
+# 1. No character map e.g. eo, en_US
+# 2. Character maps e.g. en_US.UTF-8
+# 3. Character maps with modifier e.g. tt_RU.UTF-8@iqtelif
+# This complicates the processing slightly so we do it in multiple edits,
+# the first captures the character map with the anchoring period while
+# the rest of the edits remove the period to get a valid file or adjust
+# the name to match the true name.
 CHARMAPS := $(shell echo "$(LOCALES)" | \
-		    sed -e 's/[^ .]*[.]\([^@ ]*\)\(@[^@ ]*\)*/\1/g' -e s/SJIS/SHIFT_JIS/g)
+		    sed -e 's/\([^ .]*\)\([^@ ]*\)\(@[^@ ]*\)*/\2/g' \
+			-e 's/^\./ /g' \
+			-e 's/ \./ /g' \
+			-e s/SJIS/SHIFT_JIS/g)
 CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES))
 gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES))
 
diff --git a/locale/findlocale.c b/locale/findlocale.c
index 8d6e4e33e3..43ff7201c1 100644
--- a/locale/findlocale.c
+++ b/locale/findlocale.c
@@ -244,7 +244,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
       locale_file = locale_file->successor[cnt];
 
       if (locale_file == NULL)
-	return NULL;
+	{
+	  /* If this is the second time we tried to load a failed
+	     locale then the locale_file value comes from the cache
+	     and we will not carry out any actual filesystem
+	     operations so we must set ENOENT here.  */
+	  __set_errno (ENOENT);
+	  return NULL;
+	}
     }
 
   /* The LC_CTYPE category allows to check whether a locale is really
@@ -291,8 +298,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
       if (__gconv_compare_alias (upstr (ccodeset, ccodeset),
 				 upstr (clocale_codeset,
 					clocale_codeset)) != 0)
-	/* The codesets are not identical, don't use the locale.  */
-	return NULL;
+	{
+	  /* The codesets are not identical, don't use the locale.
+	     If this is the second time we tried to load a locale
+	     whose codeset doesn't match then the result came from
+	     the cache and must set ENOENT here.  */
+	  __set_errno (ENOENT);
+	  return NULL;
+	}
     }
 
   /* Determine the locale name for which loading succeeded.  This
diff --git a/locale/loadlocale.c b/locale/loadlocale.c
index 1e5f93e927..991c0591e9 100644
--- a/locale/loadlocale.c
+++ b/locale/loadlocale.c
@@ -237,7 +237,6 @@ _nl_load_locale (struct loaded_l10nfile *file, int category)
   int save_err;
   int alloc = ld_mapped;
 
-  file->decided = 1;
   file->data = NULL;
 
   fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC);
@@ -345,6 +344,7 @@ _nl_load_locale (struct loaded_l10nfile *file, int category)
   newdata->alloc = alloc;
 
   file->data = newdata;
+  file->decided = 1;
 }
 
 void
diff --git a/localedata/Makefile b/localedata/Makefile
index 713e7aebad..6d0bac225b 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -239,6 +239,7 @@ tests = \
   tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
+  tst-locale-loadlocale \
   tst-mbswcs1 \
   tst-mbswcs2 \
   tst-mbswcs3 \
@@ -325,6 +326,7 @@ LOCALES := \
   dsb_DE.UTF-8 \
   dz_BT.UTF-8 \
   en_GB.UTF-8 \
+  en_US \
   en_US.ANSI_X3.4-1968 \
   en_US.ISO-8859-1\
   en_US.UTF-8 \
@@ -406,6 +408,8 @@ include ../gen-locales.mk
 $(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
 
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
+# tst-locale-loadlocale: Needs an en_US-named locale for the test.
+$(objpfx)tst-locale-loadlocale.out: $(gen-locales)
 endif
 
 include ../Rules
diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh
index b8c422ad13..e400d02b82 100644
--- a/localedata/gen-locale.sh
+++ b/localedata/gen-locale.sh
@@ -48,9 +48,9 @@ generate_locale ()
 }
 
 locfile=`echo $locfile|sed 's|.*/\([^/]*/LC_CTYPE\)|\1|'`
-locale=`echo $locfile|sed 's|\([^.]*\)[.].*/LC_CTYPE|\1|'`
-charmap=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|'`
-modifier=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'`
+locale=`echo $locfile|sed 's|\([^.]*\).*/LC_CTYPE|\1|'`
+charmap=`echo $locfile|sed -e 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|' -e 's|^\.||g'`
+modifier=`echo $locfile|sed 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'`
 
 echo "Generating locale $locale.$charmap: this might take a while..."
 
@@ -63,10 +63,16 @@ echo "Generating locale $locale.$charmap: this might take a while..."
 # you to develop in-progress locales.
 flags=""
 
+charmap_real="$charmap"
+
+# If no character map is specified then we fall back to UTF-8.
+if [ -z "$charmap" ]; then
+  charmap_real="UTF-8"
+fi
+
 # For SJIS the charmap is SHIFT_JIS. We just want the locale to have
 # a slightly nicer name instead of using "*.SHIFT_SJIS", but that
 # means we need a mapping here.
-charmap_real="$charmap"
 if [ "$charmap" = "SJIS" ]; then
   charmap_real="SHIFT_JIS"
 fi
@@ -80,4 +86,12 @@ if [ "$charmap_real" = 'SHIFT_JIS' ] \
   flags="$flags --no-warnings=ascii"
 fi
 
-generate_locale $charmap_real $locale$modifier $locale.$charmap$modifier "$flags"
+# If the character map is not specified then we output a locale
+# with the just the name of the locale e.g. eo, en_US. This is
+# used for test locales that act as fallbacks.
+output_file="$locale.$charmap$modifier"
+if [ -z "$charmap" ]; then
+  output_file="$locale"
+fi
+
+generate_locale $charmap_real $locale$modifier $output_file "$flags"
diff --git a/localedata/tst-locale-loadlocale.c b/localedata/tst-locale-loadlocale.c
new file mode 100644
index 0000000000..64796e975d
--- /dev/null
+++ b/localedata/tst-locale-loadlocale.c
@@ -0,0 +1,67 @@
+/* Test for locale loading error handling (Bug 14247)
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <errno.h>
+#include <locale.h>
+#include <support/check.h>
+#include <support/support.h>
+
+static int
+do_test (void)
+{
+  locale_t loc = NULL;
+
+  /* We must load an en_US-based locale for the final test to fail.
+     The locale loading code will find the en_US locale already loaded
+     but the codesets won't match.  */
+  xsetlocale (LC_ALL, "en_US.UTF-8");
+
+  /* Call newlocale with an invalid locale.  We expect the test system
+     does not have "invalidlocale" locale.  */
+  errno = 0;
+  loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0);
+  TEST_VERIFY (loc == NULL && errno != 0);
+
+  /* Call newlocale again with the same name. This triggers bug 14247 where
+     the second call fails to set errno correctly.  */
+  errno = 0;
+  loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0);
+  TEST_VERIFY (loc == NULL && errno != 0);
+
+  /* Next we attempt to load a locale that exists but whose codeset
+     does not match the codeset of the locale on the system.
+     This is more difficult to test but we rely on the fact that locale
+     processing will normalize the locale name and attempt to open
+     "en_US" with no codeset as a fallback and this will allow us to
+     compare a loaded "en_US" locale with a UTF-8 codeset to the
+     ficiticious "en_US.utf99" and get a codeset match failure.  */
+  errno = 0;
+  loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0);
+  TEST_VERIFY (loc == NULL && errno != 0);
+
+  /* Call newlocale again with the same name. This triggers bug 14247 where
+     the second call fails to set errno correctly.  */
+  errno = 0;
+  loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0);
+  TEST_VERIFY (loc == NULL && errno != 0);
+
+  return 0;
+}
+
+#include <support/test-driver.c>