Thu Mar 28 03:25:10 1996 Roland McGrath <roland@charlie-brown.gnu.ai.mit.edu>

* intl/Makefile (copysrc): Add missing > in sed cmd. Sat Mar 23 17:52:49 1996 Ulrich Drepper <drepper@gnu.ai.mit.edu> * Makeconfig: Rename Makefile variable nlsdir to i18ndir and change value to $(datadir)/i18n. `nls' is not an appropriate name. * Makefile (subdirs): Add new subdir wctype. * ctype/ctype-info.c: Add new global variable __ctype_names and initialize from _nl_C_LC_CTYPE. * ctype/ctype.h: In P1003.3b/D11 `alnum' is a separate character class. Use bit 11. [_ISbit]: Protect definition of bitmasks because they are also used in wctype.h. * libio/genops.c (_IO_sputbackc, _IO_sungetc): Clear EOF flag after successfully pushing back a character. Fundamental changes in locale implementation. Almost nothing from the old code is used anymore. * locale/charmap.c, locale/collate.c, locale/config.h, locale/ctypedump.c, locale/hash.h, locale/keyword.gperf, locale/keyword.h, locale/loadlocale.c, locale/locale-ctype.c, locale/locale.c locale/localeconv.c, locale/localedef.c, locale/localedef.h, locale/locfile-hash.c, locale/locfile-lex.c, locale/locfile-parse.c, locale/messages.c, locale/monetary.c, locale/numeric.c, locale/setlocale.c, locale/token.h, locale/xmalloc.c: Removed. * locale/Makefile: Update for new locale implementation with program source code distributed in subdir. * locale/categories.def, locale/iso-4217.def: Updated file for new locale implementation. * locale/langinfo.h: Updated for new locale implementation. (ERA_D_T_FMT, ERA_T_FMT): New official values according to P1003.2b/D11. (_NL_COLLATE_NRULES, _NL_COLLATE_RULES, _NL_COLLATE_HASH_SIZE, _NL_COLLATE_HASH_LAYERS, _NL_COLLATE_TABLE_EB, _NL_COLLATE_TABLE_EL, _NL_COLLATE_UNDEFINED, _NL_COLLATE_EXTRA_EB, _NL_COLLATE_EXTRA_EL, _NL_CTYPE_NAMES_EB, _NL_CTYPE_NAMES_EL, _NL_CTYPE_HASH_SIZE, _NL_CTYPE_HASH_LAYERS, _NL_CTYPE_CLASS_NAMES, _NL_CTYPE_MAP_NAMES, _NL_CTYPE_WIDTH): New internal values for extended LC_CTYPE and LC_COLLATE implementation. * locale/simple-hash.c, locale/simple-hash.h, locale/xmalloc.c, locale/xstrdup.c: Helper functions for locale related programs. * locale/C-collate.c, locale/C-ctype.c, locale/C-messages.c, locale/C-monetary.c, locale/C-numeric.c, locale/C-time.c, locale/lc-collate.c, locale/lc-ctype.c, locale/lc-messages.c, locale/lc-monetary.c, locale/lc-numeric.c, locale/lc-time.c: New implementation of locale functions, and new generated "C" locale data. * locale/loadlocale.c: Now handles word fields in locale binary automatically by changing the endianess if necessary. * locale/localeinfo.h (LIMAGIC): Changed magic number because of incompatible changes. (locale_data): Changed definition to allow word as a value type. (coll_sort_rule): Values for collation sorting mode. (_NL_CURRENT_WORD): New macro to access word value of locale entry. (__collate_table, __collate_extra): Declare new global variables for collation tables. * locale/programs/charmap-kw.gperf, locale/programs/charmap-kw.h, locale/programs/charmap.c, locale/programs/charset.c, locale/programs/charset.h, locale/programs/config.h, locale/programs/ctypedump.c, locale/programs/ld-collate.c, locale/programs/ld-ctype.c, locale/programs/ld-messages.c, locale/programs/ld-monetary.c, locale/programs/ld-numeric.c, locale/programs/ld-time.c, locale/programs/linereader.c, locale/programs/linereader.h, locale/programs/locale.c, locale/programs/localedef.c, locale/programs/locales.h, locale/programs/locfile-kw.gperf, locale/programs/locfile-kw.h, locale/programs/locfile-token.h, locale/programs/locfile.c, locale/programs/locfile.h, locale/programs/stringtrans.c, locale/programs/stringtrans.h: Implementation of locale related programs. * locale/weight.h: Functions to access collation tables. * posix/unistd.h: Define _POSIX2_LOCALEDEF. * stdio-common/printf_fp.c: Fix bug with printing certain numbers < 10^-1. Reported by Bill Metzenthen. * stdio-common/tfformat.c: Add new test for above bug. * string/strcoll.c, string/strxfrm.c: Real implementation of string collation according to ISO C. * wctype/Makefile, wctype/cname-lookup.h, wctype/iswctype.c, wctype/test_wctype.c, wctype/towctrans.c, wctype/wcfuncs.c, wctype/wctrans.c, wctype/wctype.c, wctype/wctype.h: New files. Implementation of wide character classes and mapping.
author: Roland McGrath <roland@gnu.org> 1996-03-28 08:30:38 +0000
committer: Roland McGrath <roland@gnu.org> 1996-03-28 08:30:38 +0000
commit: 19bc17a90548ee427035994bbc4b14395723ff1f (patch)
tree: e7a17eda196c2610ca4be26c9e7985815162eafb /locale/programs/ld-collate.c
parent: 53f770e0f9d405ea8d1888254c6f7ce431b04c6e (diff)
download: glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.gz
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.xz
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.zip
1 files changed, 1549 insertions, 0 deletions
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
new file mode 100644
index 0000000000..0f3bcbca33
--- /dev/null
+++ b/locale/programs/ld-collate.c
@@ -0,0 +1,1549 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <endian.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <obstack.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wcstr.h>
+
+#include "localeinfo.h"
+#include "locales.h"
+#include "simple-hash.h"
+#include "stringtrans.h"
+
+/* Uncomment the following line in the production version.  */
+/* define NDEBUG 1 */
+#include <assert.h>
+
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define SWAPU32(w) \
+  (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
+
+
+/* What kind of symbols get defined?  */
+enum coll_symbol
+{
+  undefined,
+  ellipsis,
+  character,
+  element,
+  symbol
+};
+
+
+typedef struct patch_t
+{
+  const char *fname;
+  size_t lineno;
+  const char *token;
+  union
+  {
+    unsigned int *pos;
+    size_t idx;
+  } where;
+  struct patch_t *next;
+} patch_t;
+
+
+typedef struct element_t
+{
+  const wchar_t *name;
+  unsigned int this_weight;
+
+  struct element_t *next;
+
+  unsigned int *ordering;
+  size_t ordering_len;
+} element_t;
+
+
+/* The real definition of the struct for the LC_CTYPE locale.  */
+struct locale_collate_t
+{
+  /* Collate symbol table.  Simple mapping to number.  */
+  hash_table symbols;
+
+  /* The collation elements.  */
+  hash_table elements;
+  struct obstack element_mem;
+
+  /* The result table.  */
+  hash_table result;
+
+  /* Sorting rules given in order_start line.  */
+  int nrules;
+  int nrules_max;
+  enum coll_sort_rule *rules;
+
+  /* Used while recognizing symbol composed of multiple tokens
+     (collating-element).  */
+  const char *combine_token;
+  size_t combine_token_len;
+
+  /* How many sorting order specifications so far.  */
+  unsigned int order_cnt;
+
+  /* Was lastline ellipsis?  */
+  int was_ellipsis;
+  /* Value of last entry if was character.  */
+  wchar_t last_char;
+  /* Current element.  */
+  element_t *current_element;
+  /* What kind of symbol is current element.  */
+  enum coll_symbol kind;
+
+  /* While collecting the weigths we need some temporary space.  */
+  unsigned int current_order;
+  int *weight_cnt;
+  int weight_idx;
+  unsigned int *weight;
+  int nweight;
+  int nweight_max;
+
+  /* Patch lists.  */
+  patch_t *current_patch;
+  patch_t *all_patches;
+
+  /* Room for the UNDEFINED information.  */
+  element_t undefined;
+  unsigned int undefined_len;
+};
+
+
+/* Be verbose?  Defined in localedef.c.  */
+extern int verbose;
+
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__p, size_t __n);
+
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+
+void
+collate_startup (struct linereader *lr, struct localedef_t *locale,
+		 struct charset_t *charset)
+{
+  struct locale_collate_t *collate;
+
+  /* It is important that we always use UCS4 encoding for strings now.  */
+  encoding_method = ENC_UCS4;
+
+  /* Allocate the needed room.  */
+  locale->categories[LC_COLLATE].collate = collate =
+    (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
+
+  /* Allocate hash table for collating elements.  */
+  if (init_hash (&collate->elements, 512))
+    error (4, 0, _("memory exhausted"));
+  collate->combine_token = NULL;
+  obstack_init (&collate->element_mem);
+
+  /* Allocate hash table for collating elements.  */
+  if (init_hash (&collate->symbols, 64))
+    error (4, 0, _("memory exhausted"));
+
+  /* Allocate hash table for result.  */
+  if (init_hash (&collate->result, 512))
+    error (4, 0, _("memory exhausted"));
+
+  collate->nrules = 0;
+  collate->nrules_max = 10;
+  collate->rules
+    = (enum coll_sort_rule *) xmalloc (collate->nrules_max
+				       * sizeof (enum coll_sort_rule));
+
+  collate->order_cnt = 1;	/* The smallest weight is 2.  */
+
+  collate->was_ellipsis = 0;
+  collate->last_char = L'\0';	/* 0 because leading ellipsis is allowed.  */
+
+  collate->all_patches = NULL;
+
+  /* This tells us no UNDEFINED entry was found until now.  */
+  collate->undefined.this_weight = 0;
+
+  lr->translate_strings = 0;
+}
+
+
+void
+collate_finish (struct localedef_t *locale, struct charset_t *charset)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  patch_t *patch;
+  size_t cnt;
+
+  /* Patch the constructed table so that forward references are
+     correctly filled.  */
+  for (patch = collate->all_patches; patch != NULL; patch = patch->next)
+    {
+      wchar_t wch;
+      size_t toklen = strlen (patch->token);
+      void *ptmp;
+      unsigned int value = 0;
+
+      wch = charset_find_value (charset, patch->token, toklen);
+      if (wch != ILLEGAL_CHAR_VALUE)
+	{
+	  element_t *runp;
+
+	  if (find_entry (&collate->result, &wch, sizeof (wchar_t),
+			  (void *) &runp) < 0)
+	    runp = NULL;
+	  for (; runp != NULL; runp = runp->next)
+	    if (runp->name[0] == wch && runp->name[1] == L'\0')
+	      break;
+
+	  value = runp == NULL ? 0 : runp->this_weight;
+	}
+      else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
+	       >= 0)
+	{
+	  value = ((element_t *) ptmp)->this_weight;
+	}
+      else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
+	       >= 0)
+	{
+	  value = (unsigned int) ptmp;
+	}
+      else
+	value = 0;
+
+      if (value == 0)
+	error_with_loc (0, 0, patch->fname, patch->lineno,
+			_("no weight defined for symbol `%s'"), patch->token);
+      else
+	*patch->where.pos = value;
+    }
+
+  /* If no definition for UNDEFINED is given, all characters in the
+     given charset must be specified.  */
+  if (collate->undefined.ordering == NULL)
+    {
+      /**************************************************************\
+      |* XXX We should test whether really an unspecified character *|
+      |* exists before giving the message.			    *|
+      \**************************************************************/
+      u32_t weight;
+
+      error (0, 0, _("no definition of `UNDEFINED'"));
+
+      collate->undefined.ordering_len = collate->nrules;
+      weight = ++collate->order_cnt;
+
+      for (cnt = 0; cnt < collate->nrules; ++cnt)
+	{
+	  u32_t one = 1;
+	  obstack_grow (&collate->element_mem, &one, sizeof (one));
+	}
+
+      for (cnt = 0; cnt < collate->nrules; ++cnt)
+	obstack_grow (&collate->element_mem, &weight, sizeof (weight));
+
+      collate->undefined.ordering = obstack_finish (&collate->element_mem);
+    }
+
+  collate->undefined_len = 2;	/* For the name: 1 x wchar_t + L'\0'.  */
+  for (cnt = 0; cnt < collate->nrules; ++cnt)
+    collate->undefined_len += 1 + collate->undefined.ordering[cnt];
+
+  /* Collating symbols are not used anymore.  */
+  (void) delete_hash (&collate->symbols);
+}
+
+
+
+void
+collate_output (struct localedef_t *locale, const char *output_path)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  u32_t table_size, table_best, level_best, sum_best;
+  void *last;
+  element_t *pelem;
+  wchar_t *name;
+  size_t len;
+  const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
+  struct iovec iov[2 + nelems];
+  struct locale_file data;
+  u32_t idx[nelems];
+  struct obstack non_simple;
+  size_t cnt, entry_size;
+  u32_t undefined_offset = UINT_MAX;
+  u32_t *table, *extra, *table2, *extra2;
+  size_t extra_len;
+
+  sum_best = UINT_MAX;
+  table_best = 0xffff;
+  level_best = 0xffff;
+
+  /* Compute table size.  */
+  fputs (_("\
+Computing table size for collation information might take a while..."),
+	 stderr);
+  for (table_size = 256; table_size < sum_best; ++table_size)
+    {
+      size_t hits[table_size];
+      unsigned int worst = 1;
+      size_t cnt;
+
+      last = NULL;
+
+      for (cnt = 0; cnt < 256; ++cnt)
+	hits[cnt] = 1;
+      memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
+
+      while (iterate_table (&collate->result, &last, (const void **) &name,
+			    &len, (void **) &pelem) >= 0)
+	if (pelem->ordering != NULL && pelem->name[0] > 0xff)
+	  if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
+	    {
+	      worst = hits[(unsigned int) pelem->name[0] % table_size];
+	      if (table_size * worst > sum_best)
+		break;
+	    }
+
+      if (table_size * worst < sum_best)
+	{
+	  sum_best = table_size * worst;
+	  table_best = table_size;
+	  level_best = worst;
+	}
+    }
+  assert (table_best != 0xffff || level_best != 0xffff);
+  fputs (_(" done\n"), stderr);
+
+  obstack_init (&non_simple);
+
+  data.magic = LIMAGIC (LC_COLLATE);
+  data.n = nelems;
+  iov[0].iov_base = (void *) &data;
+  iov[0].iov_len = sizeof (data);
+
+  iov[1].iov_base = (void *) idx;
+  iov[1].iov_len = sizeof (idx);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u32_t);
+
+  table = (u32_t *) alloca (collate->nrules * sizeof (u32_t));
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
+    = collate->nrules * sizeof (u32_t);
+  /* Another trick here.  Describing the collation method needs only a
+     few bits (3, to be exact).  But the binary file should be
+     accessible by maschines with both endianesses and so we store both
+     information in the same word.  */
+  for (cnt = 0; cnt < collate->nrules; ++cnt)
+    table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u32_t);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len = sizeof (u32_t);
+
+  entry_size = 1 + MAX (collate->nrules, 2);
+
+  table = (u32_t *) alloca (table_best * level_best * entry_size
+			    * sizeof (table[0]));
+  memset (table, '\0', table_best * level_best * entry_size
+	  * sizeof (table[0]));
+
+
+  /* Macros for inserting in output table.  */
+#define ADD_VALUE(expr)							      \
+  do {									      \
+    u32_t to_write = (u32_t) expr;					      \
+    obstack_grow (&non_simple, &to_write, sizeof (to_write));		      \
+  } while (0)
+
+#define ADD_ELEMENT(pelem, len)						      \
+  do {									      \
+    size_t cnt, idx;							      \
+									      \
+    ADD_VALUE (len);							      \
+									      \
+    wlen = wcslen (pelem->name);					      \
+    obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u32_t));     \
+									      \
+    idx = collate->nrules;						      \
+    for (cnt = 0; cnt < collate->nrules; ++cnt)				      \
+      {									      \
+	size_t disp;							      \
+									      \
+	ADD_VALUE (pelem->ordering[cnt]);				      \
+	for (disp = 0; disp < pelem->ordering[cnt]; ++disp)		      \
+	  ADD_VALUE (pelem->ordering[idx++]);				      \
+      }									      \
+  } while (0)
+
+#define ADD_FORWARD(pelem)						      \
+  do {									      \
+    /* We leave a reference in the main table and put all		      \
+       information in the table for the extended entries.  */		      \
+    element_t *runp;							      \
+    element_t *has_simple = NULL;					      \
+    size_t wlen;							      \
+									      \
+    table[(level * table_best + slot) * entry_size + 1]			      \
+      = FORWARD_CHAR;							      \
+    table[(level * table_best + slot) * entry_size + 2]			      \
+      = obstack_object_size (&non_simple) / sizeof (u32_t);		      \
+									      \
+    /* Here we have to construct the non-simple table entry.  First	      \
+       compute the total length of this entry.  */			      \
+    for (runp = (pelem); runp != NULL; runp = runp->next)		      \
+      if (runp->ordering != NULL)					      \
+	{								      \
+	  u32_t value;							      \
+	  size_t cnt;							      \
+									      \
+	  value = 1 + wcslen (runp->name) + 1;				      \
+									      \
+	  for (cnt = 0; cnt < collate->nrules; ++cnt)			      \
+	    /* We have to take care for entries without ordering	      \
+	       information.  While reading them they get inserted in the      \
+	       table and later not removed when something goes wrong with     \
+	       reading its weights.  */					      \
+	    {								      \
+	      value += 1 + runp->ordering[cnt];				      \
+									      \
+	      if (runp->name[1] == L'\0')				      \
+		has_simple = runp;					      \
+	    }								      \
+									      \
+	  ADD_ELEMENT (runp, value);					      \
+	}								      \
+									      \
+    if (has_simple == NULL)						      \
+      {									      \
+	size_t idx, cnt;						      \
+									      \
+	ADD_VALUE (collate->undefined_len + 1);				      \
+									      \
+	/* Add the name.  */						      \
+	ADD_VALUE ((pelem)->name[0]);					      \
+	ADD_VALUE (0);							      \
+									      \
+	idx = collate->nrules;						      \
+	for (cnt = 0; cnt < collate->nrules; ++cnt)			      \
+	  {								      \
+	    size_t disp;						      \
+									      \
+	    ADD_VALUE (collate->undefined.ordering[cnt]);		      \
+	    for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)   \
+	      {								      \
+		if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR)	      \
+		  ADD_VALUE ((pelem)->name[0]);				      \
+		else							      \
+		  ADD_VALUE (collate->undefined.ordering[idx++]);	      \
+		++idx;							      \
+	      }								      \
+	  }								      \
+      }									      \
+  } while (0)
+
+
+
+  /* Fill the table now.  First we look for all the characters which
+     fit into one single byte.  This speeds up the 8-bit string
+     functions.  */
+  last = NULL;
+  while (iterate_table (&collate->result, &last, (const void **) &name,
+			&len, (void **) &pelem) >= 0)
+    if (pelem->name[0] <= 0xff)
+      {
+	/* We have a single byte name.  Now we must distinguish
+	   between entries in simple form (i.e., only one value per
+	   weight and no collation element starting with the same
+	   character) and those which are not.  */
+	size_t slot = ((size_t) pelem->name[0]);
+	const size_t level = 0;
+
+	table[slot * entry_size] = pelem->name[0];
+
+	if (pelem->name[1] == L'\0' && pelem->next == NULL
+	    && pelem->ordering_len == collate->nrules)
+	  {
+	    /* Yes, we have a simple one.  Lucky us.  */
+	    size_t cnt;
+
+	    for (cnt = 0; cnt < collate->nrules; ++cnt)
+	      table[slot * entry_size + 1 + cnt]
+		= pelem->ordering[collate->nrules + cnt];
+	  }
+	else
+	  ADD_FORWARD (pelem);
+      }
+
+  /* Now check for missing single byte entries.  If one exist we fill
+     with the UNDEFINED entry.  */
+  for (cnt = 0; cnt < 256; ++cnt)
+    /* The first weight is never 0 for existing entries.  */
+    if (table[cnt * entry_size + 1] == 0)
+      {
+	/* We have to fill in the information from the UNDEFINED
+	   entry.  */
+	table[cnt * entry_size] = (u32_t) cnt;
+
+	if (collate->undefined.ordering_len == collate->nrules)
+	  {
+	    size_t inner;
+
+	    for (inner = 0; inner < collate->nrules; ++inner)
+	      if (collate->undefined.ordering[collate->nrules + inner]
+		  == ELLIPSIS_CHAR)
+		table[cnt * entry_size + 1 + inner] = cnt;
+	      else
+		table[cnt * entry_size + 1 + inner]
+		  = collate->undefined.ordering[collate->nrules + inner];
+	  }
+	else
+	  {
+	    if (undefined_offset != UINT_MAX)
+	      {
+		table[cnt * entry_size + 1] = FORWARD_CHAR;
+		table[cnt * entry_size + 2] = undefined_offset;
+	      }
+	    else
+	      {
+		const size_t slot = cnt;
+		const size_t level = 0;
+
+		ADD_FORWARD (&collate->undefined);
+		undefined_offset = table[cnt * entry_size + 2];
+	      }
+	  }
+      }
+
+  /* Now we are ready for inserting the whole rest.   */
+  last = NULL;
+  while (iterate_table (&collate->result, &last, (const void **) &name,
+			&len, (void **) &pelem) >= 0)
+    if (pelem->name[0] > 0xff)
+      {
+	/* Find the position.  */
+	size_t slot = ((size_t) pelem->name[0]) % table_best;
+	size_t level = 0;
+
+	while (table[(level * table_best + slot) * entry_size + 1] != 0)
+	  ++level;
+	assert (level < level_best);
+
+	if (pelem->name[1] == L'\0' && pelem->next == NULL
+	    && pelem->ordering_len == collate->nrules)
+	  {
+	    /* Again a simple entry.  */
+	    size_t inner;
+
+	    for (inner = 0; inner < collate->nrules; ++inner)
+	      table[(level * table_best + slot) * entry_size + 1 + inner]
+		= pelem->ordering[collate->nrules + inner];
+	  }
+	else
+	  ADD_FORWARD (pelem);
+      }
+
+  /* Add the UNDEFINED entry.  */
+  {
+    /* Here we have to construct the non-simple table entry.  */
+    size_t idx, cnt;
+
+    undefined_offset = obstack_object_size (&non_simple);
+
+    idx = collate->nrules;
+    for (cnt = 0; cnt < collate->nrules; ++cnt)
+      {
+	size_t disp;
+
+	ADD_VALUE (collate->undefined.ordering[cnt]);
+	for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
+	  ADD_VALUE (collate->undefined.ordering[idx++]);
+      }
+  }
+
+  /* Finish the extra block.  */
+  extra_len = obstack_object_size (&non_simple);
+  extra = (u32_t *) obstack_finish (&non_simple);
+  assert ((extra_len % sizeof (u32_t)) == 0);
+
+  /* Now we have to build the two array for the other byte ordering.  */
+  table2 = (u32_t *) alloca (table_best * level_best * entry_size
+			     * sizeof (table[0]));
+  extra2 = (u32_t *) alloca (extra_len);
+
+  for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
+    table2[cnt] = SWAPU32 (table[cnt]);
+
+  for (cnt = 0; cnt < extra_len / sizeof (u32_t); ++cnt)
+    extra2[cnt] = SWAPU32 (extra2[cnt]);
+
+  /* Store table adresses and lengths.   */
+#if __BYTE_ORDER == __BIG_ENDIAN
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
+    = table_best * level_best * entry_size * sizeof (table[0]);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
+    = table_best * level_best * entry_size * sizeof (table[0]);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
+#else
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
+    = table_best * level_best * entry_size * sizeof (table[0]);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
+    = table_best * level_best * entry_size * sizeof (table[0]);
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
+#endif
+
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
+  iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u32_t);
+
+  /* Update idx array.  */
+  idx[0] = iov[0].iov_len + iov[1].iov_len;
+  for (cnt = 1; cnt < nelems; ++cnt)
+    idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
+
+  write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
+}
+
+
+void
+collate_element_to (struct linereader *lr, struct localedef_t *locale,
+		    struct token *code, struct charset_t *charset)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  unsigned int value;
+  void *not_used;
+
+  if (collate->combine_token != NULL)
+    {
+      free ((void *) collate->combine_token);
+      collate->combine_token = NULL;
+    }
+
+  value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+  if (value != ILLEGAL_CHAR_VALUE)
+    {
+      lr_error (lr, _("symbol for multicharacter collating element "
+		      "`%.*s' duplicates symbolic name in charset"),
+		code->val.str.len, code->val.str.start);
+      return;
+    }
+
+  if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
+		  &not_used) >= 0)
+    {
+      lr_error (lr, _("symbol for multicharacter collating element "
+		      "`%.*s' duplicates other element definition"),
+		code->val.str.len, code->val.str.start);
+      return;
+    }
+
+  if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
+		  &not_used) >= 0)
+    {
+      lr_error (lr, _("symbol for multicharacter collating element "
+		      "`%.*s' duplicates symbol definition"),
+		code->val.str.len, code->val.str.start);
+      return;
+    }
+
+  collate->combine_token = code->val.str.start;
+  collate->combine_token_len = code->val.str.len;
+}
+
+
+void
+collate_element_from (struct linereader *lr, struct localedef_t *locale,
+		      struct token *code, struct charset_t *charset)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  element_t *elemp, *runp;
+
+  /* CODE is a string.  */
+  elemp = (element_t *) obstack_alloc (&collate->element_mem,
+				       sizeof (element_t));
+
+  /* We have to translate the string.  It may contain <...> character
+     names.  */
+  elemp->name = (wchar_t *) translate_string (code->val.str.start, charset);
+  elemp->this_weight = 0;
+  elemp->ordering = NULL;
+  elemp->ordering_len = 0;
+
+  free (code->val.str.start);
+
+  if (elemp->name == NULL)
+    {
+      /* At least one character in the string is not defined.  We simply
+	 do nothing.  */
+      if (verbose)
+	lr_error (lr, _("\
+`from' string in collation element declaration contains unknown character"));
+      return;
+    }
+
+  if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0')
+    {
+      lr_error (lr, _("illegal colltion element"));
+      return;
+    }
+
+  /* The entries in the linked lists of RESULT are sorting in
+     descending order.  The order is important for the `strcoll' and
+     `wcscoll' functions.  */
+  if (find_entry (&collate->result, elemp->name, sizeof (wchar_t),
+		  (void *) &runp) >= 0)
+    {
+      /* We already have an entry with this key.  Check whether it is
+	 identical.  */
+      element_t *prevp = NULL;
+      int cmpres;
+
+      do
+	{
+	  cmpres = wcscmp (elemp->name, runp->name);
+	  if (cmpres <= 0)
+	    break;
+	  prevp = runp;
+	}
+      while ((runp = runp->next) != NULL);
+
+      if (cmpres == 0)
+	lr_error (lr, _("duplicate collating element definition"));
+      else
+	{
+	  elemp->next = runp;
+	  if (prevp == NULL)
+	    {
+	      if (set_entry (&collate->result, elemp->name, sizeof (wchar_t),
+			     elemp) < 0)
+		error (EXIT_FAILURE, 0,
+		       _("\
+error while inserting collation element into hash table"));
+	    }
+	  else
+	    prevp->next = elemp;
+	}
+    }
+  else
+    {
+      elemp->next = NULL;
+      if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp)
+	  < 0)
+	error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
+    }
+
+  if (insert_entry (&collate->elements, collate->combine_token,
+		    collate->combine_token_len, (void *) elemp) < 0)
+    lr_error (lr, _("cannot insert new collating symbol definition: %s"),
+	      strerror (errno));
+}
+
+
+void
+collate_symbol (struct linereader *lr, struct localedef_t *locale,
+		struct token *code, struct charset_t *charset)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  wchar_t value;
+  void *not_used;
+
+  value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+  if (value != ILLEGAL_CHAR_VALUE)
+    {
+      lr_error (lr, _("symbol for multicharacter collating element "
+		      "`%.*s' duplicates symbolic name in charset"),
+		code->val.str.len, code->val.str.start);
+      return;
+    }
+
+  if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
+		  &not_used) >= 0)
+    {
+      lr_error (lr, _("symbol for multicharacter collating element "
+		      "`%.*s' duplicates element definition"),
+		code->val.str.len, code->val.str.start);
+      return;
+    }
+
+  if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
+		  &not_used) >= 0)
+    {
+      lr_error (lr, _("symbol for multicharacter collating element "
+		      "`%.*s' duplicates other symbol definition"),
+		code->val.str.len, code->val.str.start);
+      return;
+    }
+
+  if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
+		    (void *) 0) < 0)
+    lr_error (lr, _("cannot insert new collating symbol definition: %s"),
+	      strerror (errno));
+}
+
+
+void
+collate_new_order (struct linereader *lr, struct localedef_t *locale,
+		   enum coll_sort_rule sort_rule)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+
+  if (collate->nrules >= collate->nrules_max)
+    {
+      collate->nrules_max *= 2;
+      collate->rules
+	= (enum coll_sort_rule *) xrealloc (collate->rules,
+					    collate->nrules_max
+					    * sizeof (enum coll_sort_rule));
+    }
+
+  collate->rules[collate->nrules++] = sort_rule;
+}
+
+
+void
+collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+
+  collate->rules
+    = (enum coll_sort_rule *) xrealloc (collate->rules,
+					collate->nrules
+					* sizeof (enum coll_sort_rule));
+
+  /* Allocate arrays for temporary weights.  */
+  collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
+
+  /* Choose arbitrary start value for table size.  */
+  collate->nweight_max = 5 * collate->nrules;
+  collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
+}
+
+
+int
+collate_order_elem (struct linereader *lr, struct localedef_t *locale,
+		    struct token *code, struct charset_t *charset)
+{
+  const wchar_t zero = L'\0';
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  int result = 0;
+  wchar_t value;
+  void *tmp;
+  int i;
+
+  switch (code->tok)
+    {
+    case tok_bsymbol:
+      /* We have a string to find in one of the three hashing tables.  */
+      value = charset_find_value (charset, code->val.str.start,
+				  code->val.str.len);
+      if (value != ILLEGAL_CHAR_VALUE)
+	{
+	  element_t *lastp, *firstp;
+
+	  collate->kind = character;
+
+	  if (find_entry (&collate->result, &value, sizeof (wchar_t),
+			  (void *) &firstp) < 0)
+	    firstp = lastp = NULL;
+	  else
+	    {
+	      /* The entry for the simple character is always found at
+		 the end.  */
+	      lastp = firstp;
+	      while (lastp->next != NULL)
+		lastp = lastp->next;
+
+	      if (lastp->name[0] == value && lastp->name[1] == L'\0')
+		{
+		  lr_error (lr, _("duplicate definition for character `%.*s'"),
+			    code->val.str.len, code->val.str.start);
+		  lr_ignore_rest (lr, 0);
+		  result = -1;
+		  break;
+		}
+	    }
+
+	  collate->current_element
+	    = (element_t *) obstack_alloc (&collate->element_mem,
+					   sizeof (element_t));
+
+	  obstack_grow (&collate->element_mem, &value, sizeof (value));
+	  obstack_grow (&collate->element_mem, &zero, sizeof (zero));
+
+	  collate->current_element->name =
+	    (const wchar_t *) obstack_finish (&collate->element_mem);
+
+	  collate->current_element->this_weight = ++collate->order_cnt;
+
+	  collate->current_element->next = NULL;
+
+	  if (firstp == NULL)
+	    {
+	      if (insert_entry (&collate->result, &value, sizeof (wchar_t),
+				(void *) collate->current_element) < 0)
+		{
+		  lr_error (lr, _("cannot insert collation element `%.*s'"),
+			    code->val.str.len, code->val.str.start);
+		  exit (4);
+		}
+	    }
+	  else
+	    lastp->next = collate->current_element;
+	}
+      else if (find_entry (&collate->elements, code->val.str.start,
+			   code->val.str.len, &tmp) >= 0)
+	{
+	  collate->current_element = (element_t *) tmp;
+
+	  if (collate->current_element->this_weight != 0)
+	    {
+	      lr_error (lr, _("\
+collation element `%.*s' appears more than once: ignore line"),
+			code->val.str.len, code->val.str.start);
+	      lr_ignore_rest (lr, 0);
+	      result = -1;
+	      break;
+	    }
+
+	  collate->kind = element;
+	  collate->current_element->this_weight = ++collate->order_cnt;
+	}
+      else if (find_entry (&collate->symbols, code->val.str.start,
+			   code->val.str.len, &tmp) >= 0)
+	{
+	  unsigned int order = ++collate->order_cnt;
+
+	  if ((unsigned int) tmp != 0)
+	    {
+	      lr_error (lr, _("\
+collation symbol `.*s' appears more than once: ignore line"),
+			code->val.str.len, code->val.str.start);
+	      lr_ignore_rest (lr, 0);
+	      result = -1;
+	      break;
+	    }
+
+	  collate->kind = symbol;
+
+	  if (set_entry (&collate->symbols, code->val.str.start,
+			 code->val.str.len, (void *) order) < 0)
+	    {
+	      lr_error (lr, _("cannot process order specification"));
+	      exit (4);
+	    }
+	}
+      else
+	{
+	  if (verbose)
+	    lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+		      code->val.str.len, code->val.str.start);
+          lr_ignore_rest (lr, 0);
+
+          result = -1;
+	}
+      break;
+
+    case tok_undefined:
+      collate->kind = undefined;
+      collate->current_element = &collate->undefined;
+      break;
+
+    case tok_ellipsis:
+      if (collate->was_ellipsis)
+	{
+	  lr_error (lr, _("\
+two lines in a row containing `...' are not allowed"));
+	  result = -1;
+	}
+      else if (collate->kind != character)
+	{
+	  /* An ellipsis requires the previous line to be an
+	     character definition.  */
+	  lr_error (lr, _("\
+line before ellipsis does not contain definition for character constant"));
+	  lr_ignore_rest (lr, 0);
+	  result = -1;
+	}
+      else
+	collate->kind = ellipsis;
+      break;
+
+    default:
+      assert (! "illegal token in `collate_order_elem'");
+    }
+
+  /* Now it's time to handle the ellipsis in the previous line.  We do
+     this only when the last line contained an definition for an
+     character, the current line also defines an character, the
+     character code for the later is bigger than the former.  */
+  if (collate->was_ellipsis)
+    {
+      if (collate->kind != character)
+	{
+	  lr_error (lr, _("\
+line after ellipsis must contain character definition"));
+	  lr_ignore_rest (lr, 0);
+	  result = -1;
+	}
+      else if (collate->last_char > value)
+	{
+	  lr_error (lr, _("end point of ellipsis range is bigger then start"));
+	  lr_ignore_rest (lr, 0);
+	  result = -1;
+	}
+      else
+	{
+	  /* We can fill the arrays with the information we need.  */
+	  wchar_t name[2];
+	  unsigned int *data;
+	  size_t *ptr;
+	  size_t cnt;
+
+	  name[0] = collate->last_char + 1;
+	  name[1] = L'\0';
+
+	  data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
+					  * sizeof (unsigned int));
+	  ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
+
+	  if (data == NULL || ptr == NULL)
+	    error (4, 0, _("memory exhausted"));
+
+	  /* Prepare data.  Because the characters covered by an
+	     ellipsis all have equal values we prepare the data once
+	     and only change the variable number (if there are any).
+	     PTR[...] will point to the entries which will have to be
+	     fixed during the output loop.  */
+	  for (cnt = 0; cnt < collate->nrules; ++cnt)
+	    {
+	      data[cnt] = collate->weight_cnt[cnt];
+	      ptr[cnt] = (cnt == 0
+			  ? collate->nweight
+			  : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
+	    }
+
+	  for (cnt = 0; cnt < collate->nweight; ++cnt)
+	    data[collate->nrules + cnt] = collate->weight[cnt];
+
+	  for (cnt = 0; cnt < collate->nrules; ++cnt)
+	    if (data[ptr[cnt]] != ELLIPSIS_CHAR)
+	      ptr[cnt] = 0;
+
+	  while (name[0] <= value)
+	    {
+	      element_t *pelem;
+
+	      pelem = (element_t *) obstack_alloc (&collate->element_mem,
+						   sizeof (element_t));
+	      if (pelem == NULL)
+		error (4, 0, _("memory exhausted"));
+
+	      pelem->name
+		= (const wchar_t *) obstack_copy (&collate->element_mem,
+						  name, 2 * sizeof (wchar_t));
+	      pelem->this_weight = ++collate->order_cnt;
+
+	      pelem->ordering_len = collate->nweight;
+	      pelem->ordering
+		= (unsigned int *) obstack_copy (&collate->element_mem, data,
+						 (collate->nrules
+						  * pelem->ordering_len)
+						 * sizeof (unsigned int));
+
+	      /* `...' weights need to be adjusted.  */
+	      for (cnt = 0; cnt < collate->nrules; ++cnt)
+		if (ptr[cnt] != 0)
+		  pelem->ordering[ptr[cnt]] = pelem->this_weight;
+
+	      /* Insert new entry into result table.  */
+	      if (find_entry (&collate->result, name, sizeof (wchar_t),
+			      (void *) &pelem->next) >= 0)
+		{
+		  if (set_entry (&collate->result, name, sizeof (wchar_t),
+				 (void *) pelem->next) < 0)
+		    error (4, 0, _("cannot insert into result table"));
+		}
+	      else
+		if (insert_entry (&collate->result, name, sizeof (wchar_t),
+				  (void *) pelem->next) < 0)
+		  error (4, 0, _("cannot insert into result table"));
+
+	      /* Increment counter.  */
+	      ++name[0];
+	    }
+	}
+    }
+
+  /* Reset counters for weights.  */
+  collate->weight_idx = 0;
+  collate->nweight = 0;
+  for (i = 0; i < collate->nrules; ++i)
+    collate->weight_cnt[i] = 0;
+  collate->current_patch = NULL;
+
+  return result;
+}
+
+
+int
+collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
+			struct token *code, struct charset_t *charset)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  unsigned int here_weight;
+  wchar_t value;
+  void *tmp;
+
+  assert (code->tok == tok_bsymbol);
+
+  value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+  if (value != ILLEGAL_CHAR_VALUE)
+    {
+      element_t *runp;
+
+      if (find_entry (&collate->result, &value, sizeof (wchar_t),
+		      (void *)&runp) < 0)
+	runp = NULL;
+
+      while (runp != NULL
+	     && (runp->name[0] != value || runp->name[1] != L'\0'))
+	runp = runp->next;
+
+      here_weight = runp == NULL ? 0 : runp->this_weight;
+    }
+  else if (find_entry (&collate->elements, code->val.str.start,
+		       code->val.str.len, &tmp) >= 0)
+    {
+      element_t *runp = (element_t *) tmp;
+
+      here_weight = runp->this_weight;
+    }
+  else if (find_entry (&collate->symbols, code->val.str.start,
+		       code->val.str.len, &tmp) >= 0)
+    {
+      here_weight = (unsigned int) tmp;
+    }
+  else
+    {
+      if (verbose)
+	lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+		  code->val.str.len, code->val.str.start);
+      lr_ignore_rest (lr, 0);
+      return -1;
+    }
+
+  /* When we currently work on a collation symbol we do not expect any
+     weight.  */
+  if (collate->kind == symbol)
+    {
+      lr_error (lr, _("\
+specification of sorting weight for collation symbol does not make sense"));
+      lr_ignore_rest (lr, 0);
+      return -1;
+    }
+
+  /* Add to the current collection of weights.  */
+  if (collate->nweight >= collate->nweight_max)
+    {
+      collate->nweight_max *= 2;
+      collate->weight = (unsigned int *) xrealloc (collate->weight,
+						   collate->nweight_max);
+    }
+
+  /* If the weight is currently not known, we remember to patch the
+     resulting tables.  */
+  if (here_weight == 0)
+    {
+      patch_t *newp;
+
+      newp = (patch_t *) obstack_alloc (&collate->element_mem,
+					sizeof (patch_t));
+      newp->fname = lr->fname;
+      newp->lineno = lr->lineno;
+      newp->token = (const char *) obstack_copy0 (&collate->element_mem,
+						  code->val.str.start,
+						  code->val.str.len);
+      newp->where.idx = collate->nweight++;
+      newp->next = collate->current_patch;
+      collate->current_patch = newp;
+    }
+  else
+    collate->weight[collate->nweight++] = here_weight;
+  ++collate->weight_cnt[collate->weight_idx];
+
+  return 0;
+}
+
+
+int
+collate_next_weight (struct linereader *lr, struct localedef_t *locale)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+
+  if (collate->kind == symbol)
+    {
+      lr_error (lr, _("\
+specification of sorting weight for collation symbol does not make sense"));
+      lr_ignore_rest (lr, 0);
+      return -1;
+    }
+
+  ++collate->weight_idx;
+  if (collate->weight_idx >= collate->nrules)
+    {
+      lr_error (lr, _("too many weights"));
+      lr_ignore_rest (lr, 0);
+      return -1;
+    }
+
+  return 0;
+}
+
+
+int
+collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
+		       struct token *code, struct charset_t *charset)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  unsigned int value = 0;
+
+  /* There current tokens can be `IGNORE', `...', or a string.  */
+  switch (code->tok)
+    {
+    case tok_ignore:
+      /* This token is allowed in all situations.  */
+      value = IGNORE_CHAR;
+      break;
+
+    case tok_ellipsis:
+      /* The ellipsis is only allowed for the `...' or `UNDEFINED'
+	 entry.  */
+      if (collate->kind != ellipsis && collate->kind != undefined)
+	{
+	  lr_error (lr, _("\
+`...' must only be used in `...' and `UNDEFINED' entries"));
+	  lr_ignore_rest (lr, 0);
+	  return -1;
+	}
+      value = ELLIPSIS_CHAR;
+      break;
+
+    case tok_string:
+      /* This can become difficult.  We have to get the weights which
+	 correspind the the single wide chars in the string.  But some
+	 of the `chars' might not be real characters, but collation
+	 elements or symbols.  And so the string decoder might have
+	 signaled errors.  The string at this point is not translated.
+	 I.e., all <...> sequences are still there.  */
+      {
+	char *runp = code->val.str.start;
+	void *tmp;
+
+	while (*runp != '\0')
+	  {
+	    char *startp = (char *) runp;
+	    char *putp = (char *) runp;
+	    wchar_t wch;
+
+	    /* Lookup weight for char and store it.  */
+	    if (*runp == '<')
+	      {
+		while (*++runp != '\0' && *runp != '>')
+		  {
+		    if (*runp == lr->escape_char)
+		      if (*++runp == '\0')
+			{
+			  lr_error (lr, _("unterminated weight name"));
+			  lr_ignore_rest (lr, 0);
+			  return -1;
+			}
+		    *putp++ = *runp;
+		  }
+		if (*runp == '>')
+		  ++runp;
+
+		if (putp == startp)
+		  {
+		    lr_error (lr, _("empty weight name: line ignored"));
+		    lr_ignore_rest (lr, 0);
+		    return -1;
+		  }
+
+		wch = charset_find_value (charset, startp, putp - startp);
+		if (wch != ILLEGAL_CHAR_VALUE)
+		  {
+		    element_t *pelem;
+
+		    if (find_entry (&collate->result, &wch, sizeof (wchar_t),
+				    (void *)&pelem) < 0)
+		      pelem = NULL;
+
+		    while (pelem != NULL
+			   && (pelem->name[0] != wch
+			       || pelem->name[1] != L'\0'))
+		      pelem = pelem->next;
+
+		    value = pelem == NULL ? 0 : pelem->this_weight;
+		  }
+		else if (find_entry (&collate->elements, startp, putp - startp,
+				     &tmp) >= 0)
+		  {
+		    element_t *pelem = (element_t *) tmp;
+
+		    value = pelem->this_weight;
+		  }
+		else if (find_entry (&collate->symbols, startp, putp - startp,
+				     &tmp) >= 0)
+		  {
+		    value = (unsigned int) tmp;
+		  }
+		else
+		  {
+		    if (verbose)
+		      lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+				putp - startp, startp);
+		    lr_ignore_rest (lr, 0);
+		    return -1;
+		  }
+	      }
+	    else
+	      {
+		element_t *wp;
+		wchar_t wch;
+
+		if (*runp == lr->escape_char)
+		  {
+		    static char digits[] = "0123456789abcdef";
+		    char *dp;
+		    int base;
+
+		    ++runp;
+		    if (tolower (*runp) == 'x')
+		      {
+			++runp;
+			base = 16;
+		      }
+		    else if (tolower (*runp) == 'd')
+		      {
+			++runp;
+			base = 10;
+		      }
+		    else
+		      base = 8;
+
+		    dp = strchr (digits, tolower (*runp));
+		    if (dp == NULL || (dp - digits) >= base)
+		      {
+		      illegal_char:
+			lr_error (lr, _("\
+illegal character constant in string"));
+			lr_ignore_rest (lr, 0);
+			return -1;
+		      }
+		    wch = dp - digits;
+		    ++runp;
+
+		    dp = strchr (digits, tolower (*runp));
+		    if (dp == NULL || (dp - digits) >= base)
+		      goto illegal_char;
+		    wch *= base;
+		    wch += dp - digits;
+		    ++runp;
+
+		    if (base != 16)
+		      {
+			dp = strchr (digits, tolower (*runp));
+			if (dp != NULL && (dp - digits < base))
+			  {
+			    wch *= base;
+			    wch += dp - digits;
+			    ++runp;
+			  }
+		      }
+		  }
+		else
+		  wch = (wchar_t) *runp++;
+
+		/* Lookup the weight for WCH.  */
+		if (find_entry (&collate->result, &wch, sizeof (wch),
+				(void *)&wp) < 0)
+		  wp = NULL;
+
+		while (wp != NULL
+		       && (wp->name[0] != wch || wp->name[1] != L'\0'))
+		  wp = wp->next;
+
+		value = wp == NULL ? 0 : wp->this_weight;
+
+		/* To get the correct name for the error message.  */
+		putp = runp;
+
+		/**************************************************\
+		|* I know here is something wrong.  Characters in *|
+		|* the string which are not in the <...> form	  *|
+		|* cannot be declared forward for now!!!	  *|
+		\**************************************************/
+	      }
+
+	    /* Store in weight array.  */
+	    if (collate->nweight >= collate->nweight_max)
+	      {
+		collate->nweight_max *= 2;
+		collate->weight
+		  = (unsigned int *) xrealloc (collate->weight,
+					       collate->nweight_max);
+	      }
+
+	    if (value == 0)
+	      {
+		patch_t *newp;
+
+		newp = (patch_t *) obstack_alloc (&collate->element_mem,
+						  sizeof (patch_t));
+		newp->fname = lr->fname;
+		newp->lineno = lr->lineno;
+		newp->token
+		  = (const char *) obstack_copy0 (&collate->element_mem,
+						  startp, putp - startp);
+		newp->where.idx = collate->nweight++;
+		newp->next = collate->current_patch;
+		collate->current_patch = newp;
+	      }
+	    else
+	      collate->weight[collate->nweight++] = value;
+	    ++collate->weight_cnt[collate->weight_idx];
+	  }
+      }
+      return 0;
+
+    default:
+      assert (! "should not happen");
+    }
+
+
+  if (collate->nweight >= collate->nweight_max)
+    {
+      collate->nweight_max *= 2;
+      collate->weight = (unsigned int *) xrealloc (collate->weight,
+						   collate->nweight_max);
+    }
+
+  collate->weight[collate->nweight++] = value;
+  ++collate->weight_cnt[collate->weight_idx];
+
+  return 0;
+}
+
+
+void
+collate_end_weight (struct linereader *lr, struct localedef_t *locale)
+{
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  element_t *pelem = collate->current_element;
+
+  if (collate->kind == symbol)
+    {
+      /* We don't have to do anything.  */
+      collate->was_ellipsis = 0;
+      return;
+    }
+
+  if (collate->kind == ellipsis)
+    {
+      /* Before the next line is processed the ellipsis is handled.  */
+      collate->was_ellipsis = 1;
+      return;
+    }
+
+  assert (collate->kind == character || collate->kind == element
+	  || collate->kind == undefined);
+
+  /* Fill in the missing weights.  */
+  while (++collate->weight_idx < collate->nrules)
+    {
+      collate->weight[collate->nweight++] = pelem->this_weight;
+      ++collate->weight_cnt[collate->weight_idx];
+    }
+
+  /* Now we know how many ordering weights the current
+     character/element has.  Allocate room in the element structure
+     and copy information.  */
+  pelem->ordering_len = collate->nweight;
+
+  /* First we write an array with the number of values for each
+     weight.  */
+  obstack_grow (&collate->element_mem, collate->weight_cnt,
+		collate->nrules * sizeof (unsigned int));
+
+  /* Now the weights itselves.  */
+  obstack_grow (&collate->element_mem, collate->weight,
+		collate->nweight * sizeof (unsigned int));
+
+  /* Get result.  */
+  pelem->ordering = obstack_finish (&collate->element_mem);
+
+  /* Now we handle the "patches".  */
+  while (collate->current_patch != NULL)
+    {
+      patch_t *this_patch;
+
+      this_patch = collate->current_patch;
+
+      this_patch->where.pos = &pelem->ordering[collate->nrules
+					      + this_patch->where.idx];
+
+      collate->current_patch = this_patch->next;
+      this_patch->next = collate->all_patches;
+      collate->all_patches = this_patch;
+    }
+
+  /* Set information for next round.  */
+  collate->was_ellipsis = 0;
+  if (collate->kind != undefined)
+    collate->last_char = pelem->name[0];
+}
author	Roland McGrath <roland@gnu.org>	1996-03-28 08:30:38 +0000
committer	Roland McGrath <roland@gnu.org>	1996-03-28 08:30:38 +0000
commit	19bc17a90548ee427035994bbc4b14395723ff1f (patch)
tree	e7a17eda196c2610ca4be26c9e7985815162eafb /locale/programs/ld-collate.c
parent	53f770e0f9d405ea8d1888254c6f7ce431b04c6e (diff)
download	glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.gz glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.xz glibc-19bc17a90548ee427035994bbc4b14395723ff1f.zip