about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog16
-rw-r--r--iconv/gconv_trans.c8
-rw-r--r--locale/C-ctype.c13
-rw-r--r--locale/C-translit.h21
-rw-r--r--locale/C-translit.h.in97
-rw-r--r--locale/Makefile8
-rw-r--r--locale/categories.def3
-rw-r--r--locale/gen-translit.pl142
-rw-r--r--locale/langinfo.h3
-rw-r--r--locale/programs/ld-ctype.c18
10 files changed, 303 insertions, 26 deletions
diff --git a/ChangeLog b/ChangeLog
index d24d3ea16b..e3fc79e77e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
 2000-07-22  Ulrich Drepper  <drepper@redhat.com>
 
+	* iconv/gconv_trans.c: Correct a few bugs in the search loop.  Remove
+	remainders of hash table.
+	* locale/categories.def: Remove remainders of transliteration
+	hash table.
+	* locale/langinfo.h: Likewise.
+	* locale/programs/ld-ctype.c: Likewise.  Fix code to write out
+	transliteration tables.
+
+	* locale/gen-translit.pl: New file.
+	* locale/C-translit.h.in: New file.
+	* locale/C-ctype.c: Include C-translit.h.  Initialize transliteration
+	data pointers with data from this file.
+	* locale/Makefile (distribute): Add C-translit.h.in, C-translit.h,
+	and gen-translit.pl.
+	Add rule to generate C-translit.h.
+
 	* stdio-common/vfscanf.c: Handle input -- with format %f correctly
 	(it's no input error).
 	* stdio-common/tstscanf.c: Add test case for format %f with input --.
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index db7c567f1c..bb908176e6 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -41,7 +41,6 @@ __gconv_transliterate (struct __gconv_step *step,
 {
   /* Find out about the locale's transliteration.  */
   uint_fast32_t size;
-  uint_fast32_t layers;
   uint32_t *from_idx;
   uint32_t *from_tbl;
   uint32_t *to_idx;
@@ -57,12 +56,11 @@ __gconv_transliterate (struct __gconv_step *step,
 
   /* If there is no transliteration information in the locale don't do
      anything and return the error.  */
-  size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
+  size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
   if (size == 0)
     goto no_rules;
 
   /* Get the rest of the values.  */
-  layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
   from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
   from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
   to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
@@ -148,9 +146,9 @@ __gconv_transliterate (struct __gconv_step *step,
 	return __GCONV_INCOMPLETE_INPUT;
 
       if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
-	low = idx;
+	low = med + 1;
       else
-	high = idx;
+	high = med;
     }
 
  no_rules:
diff --git a/locale/C-ctype.c b/locale/C-ctype.c
index e93a585538..fe1e8ac1fb 100644
--- a/locale/C-ctype.c
+++ b/locale/C-ctype.c
@@ -20,6 +20,8 @@
 #include "localeinfo.h"
 #include <endian.h>
 
+#include "C-translit.h"
+
 /* This table's entries are taken from POSIX.2 Table 2-6
    ``LC_CTYPE Category Definition in the POSIX Locale''.
 
@@ -420,12 +422,11 @@ const struct locale_data _nl_C_LC_CTYPE =
     { word: L'7' },
     { word: L'8' },
     { word: L'9' },
-    { word: 0 },
-    { word: 0 },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
+    { word: NTRANSLIT },
+    { wstr: translit_from_idx },
+    { wstr: (uint32_t *) translit_from_tbl },
+    { wstr: translit_to_idx },
+    { wstr: (uint32_t *) translit_to_tbl },
     { word: 1 },
     { wstr: (uint32_t *) L"?" },
     { word: 0 },
diff --git a/locale/C-translit.h b/locale/C-translit.h
new file mode 100644
index 0000000000..2d42133b0e
--- /dev/null
+++ b/locale/C-translit.h
@@ -0,0 +1,21 @@
+#define NTRANSLIT 20
+static const uint32_t translit_from_idx[] =
+{
+     0,    2,    4,    6,    8,   10,   12,   14,   16,   18,   20,   22,
+    24,   26,   28,   30,   32,   34,   36,   38
+};
+static const wchar_t translit_from_tbl[] =
+  L"\xa9" L"\0" L"\xab" L"\0" L"\xae" L"\0" L"\xbb" L"\0" L"\xbc" L"\0"
+  L"\xbd" L"\0" L"\xbe" L"\0" L"\xc4" L"\0" L"\xc5" L"\0" L"\xc6" L"\0"
+  L"\xd6" L"\0" L"\xdc" L"\0" L"\xdf" L"\0" L"\xe4" L"\0" L"\xe5" L"\0"
+  L"\xe6" L"\0" L"\xf6" L"\0" L"\xfc" L"\0" L"\x201c" L"\0" L"\x201d";
+static const uint32_t translit_to_idx[] =
+{
+     0,    5,    9,   14,   18,   23,   28,   33,   37,   41,   45,   49,
+    53,   57,   61,   65,   69,   73,   77,   80
+};
+static const wchar_t translit_to_tbl[] =
+  L"(C)\0" L"\0" L"<<\0" L"\0" L"(R)\0" L"\0" L">>\0" L"\0" L"1/4\0" L"\0"
+  L"1/2\0" L"\0" L"3/4\0" L"\0" L"AE\0" L"\0" L"AA\0" L"\0" L"AE\0" L"\0"
+  L"OE\0" L"\0" L"UE\0" L"\0" L"ss\0" L"\0" L"ae\0" L"\0" L"aa\0" L"\0"
+  L"ae\0" L"\0" L"oe\0" L"\0" L"ue\0" L"\0" L"\"\0" L"\0" L"\"\0";
diff --git a/locale/C-translit.h.in b/locale/C-translit.h.in
new file mode 100644
index 0000000000..e2f711ea59
--- /dev/null
+++ b/locale/C-translit.h.in
@@ -0,0 +1,97 @@
+/* Transliteration for the C locale.
+   Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* The entries here have to be sorted relative to the input string.  */
+
+/* <U00A9> COPYRIGHT SIGN.  */
+"\xa9"   "(C)"
+
+/* <U00AB> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK.  */
+"\xab"   "<<"
+
+/* <U00AE> REGISTERED SIGN.  */
+"\xae"   "(R)"
+
+/* <U00BB> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK.  */
+"\xbb"   ">>"
+
+/* <U00BC> VULGAR FRACTION ONE QUARTER.  */
+"\xbc"   "1/4"
+
+/* <U00BD> VULGAR FRACTION ONE HALF.  */
+"\xbd"   "1/2"
+
+/* <U00BE> VULGAR FRACTION THREE QUARTERS.  */
+"\xbe"   "3/4"
+
+/* <U00C4> LATIN CAPITAL LETTER A WITH DIAERESIS.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xc4"   "AE"
+
+/* <U00C5> LATIN CAPITAL LETTER A WITH RING ABOVE.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xc5"   "AA"
+
+/* <U00C6> LATIN CAPITAL LETTER AE.  */
+"\xc6"   "AE"
+
+/* <U00D6> LATIN CAPITAL LETTER O WITH DIAERESIS.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xd6"   "OE"
+
+/* <U00DC> LATIN CAPITAL LETTER U WITH DIAERESIS.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xdc"   "UE"
+
+/* <U00DF> LATIN SMALL LETTER SHARP S.  */
+"\xdf"   "ss"
+
+/* <U00E4> LATIN SMALL LETTER A WITH DIAERESIS.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xe4"   "ae"
+
+/* <U00E5> LATIN SMALL LETTER A WITH RING ABOVE.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xe5"   "aa"
+
+/* <U00E6> LATIN SMALL LETTER AE.  */
+"\xe6"   "ae"
+
+/* <U00F6> LATIN SMALL LETTER O WITH DIAERESIS.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xf6"   "oe"
+
+/* <U00FC> LATIN SMALL LETTER U WITH DIAERESIS.  */
+/* XXX It is not clear whether this is the best transliteration for
+   all locales.  If not, we probably have to take it out completely.  */
+"\xfc"   "ue"
+
+/* <U201C> LEFT DOUBLE QUOTATION MARK.  */
+"\x201c" "\""
+
+/* <U201D> RIGHT DOUBLE QUOTATION MARK.  */
+"\x201d" "\""
diff --git a/locale/Makefile b/locale/Makefile
index db71cc2422..2825a697c2 100644
--- a/locale/Makefile
+++ b/locale/Makefile
@@ -25,6 +25,7 @@ headers		= locale.h langinfo.h xlocale.h
 distribute	= localeinfo.h categories.def iso-639.def iso-3166.def \
 		  iso-4217.def weight.h weightwc.h strlen-hash.h elem-hash.h \
 		  indigits.h indigitswc.h outdigits.h outdigitswc.h \
+		  C-translit.h.in C-translit.h gen-translit.pl \
 		  $(addprefix programs/, \
 			      locale.c localedef.c \
 			      $(localedef-modules:=.c) $(locale-modules:=.c) \
@@ -73,6 +74,13 @@ $(objpfx)localedef: $(localedef-modules:%=$(objpfx)%.o)
 $(objpfx)locale: $(locale-modules:%=$(objpfx)%.o)
 $(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o)
 
+C-translit.h: C-translit.h.in gen-translit.pl
+	$(PERL) gen-translit.pl < $< > $@.tmp
+	$(move-if-change) $@.tmp $@
+ifeq ($(with-cvs),yes)
+	test ! -d CVS || cvs $(CVSOPTS) commit -mRegenerated $@
+endif
+
 localepath = "$(localedir):$(i18ndir)"
 
 locale-CPPFLAGS := -DLOCALE_PATH='$(localepath)' \
diff --git a/locale/categories.def b/locale/categories.def
index 8e5e65a878..a8fa30e575 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -126,8 +126,7 @@ DEFINE_CATEGORY
   DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_WC, "ctype-outdigit7_wc", std, word)
   DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_WC, "ctype-outdigit8_wc", std, word)
   DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_WC, "ctype-outdigit9_wc", std, word)
-  DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_SIZE, "ctype-translit-hash-size", std, word)
-  DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_LAYERS, "ctype-translit-hash-layers", std, word)
+  DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TAB_SIZE, "ctype-translit-tab-size", std, word)
   DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_IDX, "ctype-translit-from-idx", std, string)
   DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, string)
   DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, string)
diff --git a/locale/gen-translit.pl b/locale/gen-translit.pl
new file mode 100644
index 0000000000..b6fba77c80
--- /dev/null
+++ b/locale/gen-translit.pl
@@ -0,0 +1,142 @@
+#! /usr/bin/perl -w
+open F, "cat C-translit.h.in | gcc -E - |" || die "Cannot preprocess input file";
+
+
+sub cstrlen {
+  my($str) = @_;
+  my($len) = length($str);
+  my($cnt);
+  my($res) = 0;
+
+  for ($cnt = 0; $cnt < $len; ++$cnt) {
+    if (substr($str, $cnt, 1) eq '\\') {
+      # Recognize the escape sequence.
+      if (substr($str, $cnt + 1, 1) eq 'x') {
+	my($inner);
+	for ($inner = $cnt + 2; $inner < $len && $inner < $cnt + 10; ++$inner) {
+	  my($ch) = substr($str, $inner, 1);
+	  next if (($ch ge '0' && $ch le '9')
+		   || ($ch ge 'a' && $ch le 'f')
+		   || ($ch ge 'A' && $ch le 'F'));
+	  last;
+	}
+	$cnt = $inner;
+	++$res;
+      } else {
+	die "invalid input" if ($cnt + 1 >= $len);
+	++$res;
+	++$cnt;
+      }
+    } else {
+      ++$res;
+    }
+  }
+
+  return $res;
+}
+
+while (<F>) {
+  next if (/^#/);
+  next if (/^[ 	]*$/);
+  chop;
+
+  if (/"([^\"]*)"[ 	]*"(.*)"/) {
+    my($from) = $1;
+    my($to) = $2;
+    my($fromlen) = cstrlen($from);
+    my($tolen) = cstrlen($to);
+
+    push(@froms, $from);
+    push(@fromlens, $fromlen);
+    push(@tos, $to);
+    push(@tolens, $tolen);
+  }
+}
+
+printf "#define NTRANSLIT %d\n", $#froms + 1;
+
+printf "static const uint32_t translit_from_idx[] =\n{\n  ";
+$col = 2;
+$total = 0;
+for ($cnt = 0; $cnt <= $#fromlens; ++$cnt) {
+  if ($cnt != 0) {
+    if ($col + 7 >= 79) {
+      printf(",\n  ");
+      $col = 2;
+    } else {
+      printf(", ");
+      $col += 2;
+    }
+  }
+  printf("%4d", $total);
+  $total += $fromlens[$cnt] + 1;
+  $col += 4;
+}
+printf("\n};\n");
+
+printf "static const wchar_t translit_from_tbl[] =\n ";
+$col = 1;
+for ($cnt = 0; $cnt <= $#froms; ++$cnt) {
+  if ($cnt != 0) {
+    if ($col + 6 >= 79) {
+      printf("\n ");
+      $col = 1;
+    }
+    printf(" L\"\\0\"");
+    $col += 6;
+  }
+  if ($col > 2 && $col + length($froms[$cnt]) + 4 >= 79) {
+    printf("\n  ");
+    $col = 2;
+  } else {
+    printf(" ");
+    ++$col;
+  }
+  printf("L\"$froms[$cnt]\"");
+  $col += length($froms[$cnt]) + 3;
+}
+printf(";\n");
+
+printf "static const uint32_t translit_to_idx[] =\n{\n  ";
+$col = 2;
+$total = 0;
+for ($cnt = 0; $cnt <= $#tolens; ++$cnt) {
+  if ($cnt != 0) {
+    if ($col + 7 >= 79) {
+      printf(",\n  ");
+      $col = 2;
+    } else {
+      printf(", ");
+      $col += 2;
+    }
+  }
+  printf("%4d", $total);
+  $total += $tolens[$cnt] + 2;
+  $col += 4;
+}
+printf("\n};\n");
+
+printf "static const wchar_t translit_to_tbl[] =\n ";
+$col = 1;
+for ($cnt = 0; $cnt <= $#tos; ++$cnt) {
+  if ($cnt != 0) {
+    if ($col + 6 >= 79) {
+      printf("\n ");
+      $col = 1;
+    }
+    printf(" L\"\\0\"");
+    $col += 6;
+  }
+  if ($col > 2 && $col + length($tos[$cnt]) + 6 >= 79) {
+    printf("\n  ");
+    $col = 2;
+  } else {
+    printf(" ");
+    ++$col;
+  }
+  printf("L\"$tos[$cnt]\\0\"");
+  $col += length($tos[$cnt]) + 5;
+}
+printf(";\n");
+
+exit 0;
diff --git a/locale/langinfo.h b/locale/langinfo.h
index d2cc2a8672..69d7292765 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -316,8 +316,7 @@ enum
   _NL_CTYPE_OUTDIGIT7_WC,
   _NL_CTYPE_OUTDIGIT8_WC,
   _NL_CTYPE_OUTDIGIT9_WC,
-  _NL_CTYPE_TRANSLIT_HASH_SIZE,
-  _NL_CTYPE_TRANSLIT_HASH_LAYERS,
+  _NL_CTYPE_TRANSLIT_TAB_SIZE,
   _NL_CTYPE_TRANSLIT_FROM_IDX,
   _NL_CTYPE_TRANSLIT_FROM_TBL,
   _NL_CTYPE_TRANSLIT_TO_IDX,
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index e297aeb254..5dfcec3339 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -173,13 +173,11 @@ struct locale_ctype_t
   unsigned char *width;
   uint32_t mb_cur_max;
   const char *codeset_name;
-  uint32_t translit_hash_size;
-  uint32_t translit_hash_layers;
   uint32_t *translit_from_idx;
   uint32_t *translit_from_tbl;
   uint32_t *translit_to_idx;
   uint32_t *translit_to_tbl;
-  size_t translit_idx_size;
+  uint32_t translit_idx_size;
   size_t translit_from_tbl_size;
   size_t translit_to_tbl_size;
 
@@ -866,7 +864,7 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 	  {
 #define CTYPE_EMPTY(name) \
 	  case name:							      \
-	    iov[2 + elem + offset].iov_base = "";			      \
+	    iov[2 + elem + offset].iov_base = (void *) "";		      \
 	    iov[2 + elem + offset].iov_len = 0;				      \
 	    idx[elem + 1] = idx[elem];					      \
 	    break
@@ -911,14 +909,12 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 		      ctype->names, (ctype->plane_size * ctype->plane_cnt
 				     * sizeof (uint32_t)));
 
-	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
-		      &ctype->translit_hash_size, sizeof (uint32_t));
-	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
-		      &ctype->translit_hash_layers, sizeof (uint32_t));
+	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
+		      &ctype->translit_idx_size, sizeof (uint32_t));
 
 	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 		      ctype->translit_from_idx,
-		      ctype->translit_idx_size);
+		      ctype->translit_idx_size * sizeof (uint32_t));
 
 	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 		      ctype->translit_from_tbl,
@@ -926,7 +922,7 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 
 	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 		      ctype->translit_to_idx,
-		      ctype->translit_idx_size);
+		      ctype->translit_idx_size * sizeof (uint32_t));
 
 	  CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 		      ctype->translit_to_tbl, ctype->translit_to_tbl_size);
@@ -3664,7 +3660,7 @@ Computing table size for character classes might take a while..."),
 	}
 
       /* Store the information about the length.  */
-      ctype->translit_idx_size = number * sizeof (uint32_t);
+      ctype->translit_idx_size = number;
       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
     }