about summary refs log tree commit diff
path: root/locale
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-11-08 23:08:32 +0000
committerUlrich Drepper <drepper@redhat.com>2000-11-08 23:08:32 +0000
commitbb39c4ef8f3f54d36462ea271cdcbc648b7409d7 (patch)
treee879c86e09b3c93852564379842b74c196c3730d /locale
parentd3d99893eeedbb681f6b9b76427c33129d69834d (diff)
downloadglibc-bb39c4ef8f3f54d36462ea271cdcbc648b7409d7.tar.gz
glibc-bb39c4ef8f3f54d36462ea271cdcbc648b7409d7.tar.xz
glibc-bb39c4ef8f3f54d36462ea271cdcbc648b7409d7.zip
Update.
2000-10-27  Bruno Haible  <haible@clisp.cons.org>

	* locale/programs/charmap.c (charmap_read): Verify ASCII
	compatibility of charmap.
Diffstat (limited to 'locale')
-rw-r--r--locale/programs/charmap.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
index 776d6ff92c..59f317a699 100644
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@@ -26,6 +26,7 @@
 #include <libintl.h>
 #include <limits.h>
 #include <obstack.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -192,6 +193,64 @@ charmap_read (const char *filename)
 	       DEFAULT_CHARMAP);
     }
 
+  /* Test of ASCII compatibility of locale encoding.
+
+     Verify that the encoding to be used in a locale is ASCII compatible,
+     at least for the graphic characters, excluding the control characters,
+     '$' and '@'.  This constraint comes from an ISO C 99 restriction.
+
+     ISO C 99 section 7.17.(2) (about wchar_t):
+       the null character shall have the code value zero and each member of
+       the basic character set shall have a code value equal to its value
+       when used as the lone character in an integer character constant.
+     ISO C 99 section 5.2.1.(3):
+       Both the basic source and basic execution character sets shall have
+       the following members: the 26 uppercase letters of the Latin alphabet
+            A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+       the 26 lowercase letters of the Latin alphabet
+            a b c d e f g h i j k l m n o p q r s t u v w x y z
+       the 10 decimal digits
+            0 1 2 3 4 5 6 7 8 9
+       the following 29 graphic characters
+            ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
+       the space character, and control characters representing horizontal
+       tab, vertical tab, and form feed.
+
+     Therefore, for all members of the "basic character set", the 'char' code
+     must have the same value as the 'wchar_t' code, which in glibc is the
+     same as the Unicode code, which for all of the enumerated characters
+     is identical to the ASCII code. */
+  if (result != NULL)
+    {
+      static const char basic_charset[] =
+	{
+	  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+	  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+	  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+	  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+	  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+	  '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
+	  '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
+	  '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
+	};
+      int failed = 0;
+      const char *p = basic_charset;
+
+      do
+	{
+	  struct charseq * seq = charmap_find_symbol (result, p, 1);
+
+	  if (seq == NULL || seq->ucs4 != *p)
+	    failed = 1;
+	}
+      while (*p++ != '\0');
+
+      if (failed)
+	fprintf (stderr, _("\
+character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
+		 result->code_set_name);
+    }
+
   return result;
 }