5 files changed, 477 insertions, 16 deletions
diff --git a/locale/C-collate.c b/locale/C-collate.c
index 7875f5de22..7302682b30 100644
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@@ -150,13 +150,20 @@ const struct locale_data _nl_C_LC_COLLATE =
   _nl_C_name,
   NULL, 0, 0, /* no file mapped */
   UNDELETABLE,
-  6,
+  13,
   {
     { word: 0 },
     { string: NULL },
     { string: NULL },
     { string: NULL },
     { string: NULL },
+    { string: NULL },
+    { word: 0 },
+    { word: 0 },
+    { string: NULL },
+    { string: NULL },
+    { string: NULL },
+    { string: NULL },
     { string: NULL }
   }
 };
diff --git a/locale/categories.def b/locale/categories.def
index 40fc74213c..7ebb8536a5 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -48,6 +48,13 @@ DEFINE_CATEGORY
   DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB,       "collate-weightmb",         std, string)
   DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB,        "collate-extramb",          std, string)
   DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB,     "collate-indirectmb",       std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE,      "collate-hash-size",        std, word)
+  DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS,    "collate-hash-layers",      std, word)
+  DEFINE_ELEMENT (_NL_COLLATE_NAMES,          "collate-names",            std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_TABLEWC,        "collate-tablewc",          std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC,       "collate-weightwc",         std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC,        "collate-extrawc",          std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC,     "collate-indirectwc",       std, string)
   ), NO_POSTLOAD)
 
 
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 3f39298c17..e46fc65184 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -236,6 +236,13 @@ enum
   _NL_COLLATE_WEIGHTMB,
   _NL_COLLATE_EXTRAMB,
   _NL_COLLATE_INDIRECTMB,
+  _NL_COLLATE_HASH_SIZE,
+  _NL_COLLATE_HASH_LAYERS,
+  _NL_COLLATE_NAMES,
+  _NL_COLLATE_TABLEWC,
+  _NL_COLLATE_WEIGHTWC,
+  _NL_COLLATE_EXTRAWC,
+  _NL_COLLATE_INDIRECTWC,
   _NL_NUM_LC_COLLATE,
 
   /* LC_CTYPE category: character classification.
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index a7eb8083a4..ae132b4da5 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -106,6 +106,9 @@ struct element_t
 
   /* Next element in multibyte output list.  */
   struct element_t *mbnext;
+
+  /* Next element in wide character output list.  */
+  struct element_t *wcnext;
 };
 
 /* Special element value.  */
@@ -171,6 +174,14 @@ struct locale_collate_t
   /* Arrays with heads of the list for each of the leading bytes in
      the multibyte sequences.  */
   struct element_t *mbheads[256];
+
+  /* Table size of wide character hash table.  */
+  size_t plane_size;
+  size_t plane_cnt;
+
+  /* Arrays with heads of the list for each of the leading bytes in
+     the multibyte sequences.  */
+  struct element_t **wcheads;
 };
 
 
@@ -1381,6 +1392,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
   int need_undefined = 0;
   struct section_list *sect;
   int ruleidx;
+  int nr_wide_elems = 0;
+  size_t min_total;
+  size_t act_size;
 
   if (collate == NULL)
     {
@@ -1457,8 +1471,8 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
      be encoded to make it possible to emit the value as a byte
      string.  */
   for (i = 0; i < nrules; ++i)
-    mbact[i] = 3;
-  wcact = 3;
+    mbact[i] = 2;
+  wcact = 2;
   runp = collate->start;
   while (runp != NULL)
     {
@@ -1518,7 +1532,13 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 	}
 
       if (runp->wcs != NULL)
-	runp->wcorder = wcact++;
+	{
+	  runp->wcorder = wcact++;
+
+	  /* We take the opportunity to count the elements which have
+	     wide characters.  */
+	  ++nr_wide_elems;
+	}
 
       /* Up to the next entry.  */
       runp = runp->next;
@@ -1533,6 +1553,165 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 	collate->mbheads[i] = &collate->undefined;
       }
 
+  /* Now to the wide character case.  Here we have to find first a good
+     mapping function to get the wide range of wide character values
+     (0x00000000 to 0x7fffffff) to a managable table.  This might take
+     some time so we issue a warning.
+
+     We use a very trivial hashing function to store the sparse
+     table.  CH % TABSIZE is used as an index.  To solve multiple hits
+     we have N planes.  This guarantees a fixed search time for a
+     character [N / 2].  In the following code we determine the minimum
+     value for TABSIZE * N, where TABSIZE >= 256.
+
+     Some people complained that this algorithm takes too long.  Well,
+     go on, improve it.  But changing the step size is *not* an
+     option.  Some people changed this to use only sizes of prime
+     numbers.  Think again, do some math.  We are looking for the
+     optimal solution, not something which works in general.  Unless
+     somebody can provide a dynamic programming solution I think this
+     implementation is as good as it can get.  */
+  if (nr_wide_elems > 512 && !be_quiet)
+    fputs (_("\
+Computing table size for collation table might take a while..."),
+	   stderr);
+
+  min_total = UINT_MAX;
+  act_size = 256;
+
+  /* While we want to have a small total size we are willing to use a
+     little bit larger table if this reduces the number of layers.
+     Therefore we add a little penalty to the number of planes.
+     Maybe this constant has to be adjusted a bit.  */
+#define PENALTY 128
+  do
+    {
+      size_t cnt[act_size];
+      struct element_t *elem[act_size];
+      size_t act_planes = 1;
+
+      memset (cnt, '\0', sizeof cnt);
+      memset (elem, '\0', sizeof elem);
+
+      runp = collate->start;
+      while (runp != NULL)
+	{
+	  if (runp->wcs != NULL)
+	    {
+	      size_t nr = runp->wcs[0] % act_size;
+	      struct element_t *elemp = elem[nr];
+
+	      while (elemp != NULL)
+		{
+		  if (elemp->wcs[0] == runp->wcs[0])
+		    break;
+		  elemp = elemp->wcnext;
+		}
+
+	      if (elemp == NULL && ++cnt[nr] > act_planes)
+		{
+		  act_planes = cnt[nr];
+
+		  runp->wcnext = elem[nr];
+		  elem[nr] = runp;
+
+		  if ((act_size + PENALTY) * act_planes >= min_total)
+		    break;
+		}
+	    }
+
+	  /* Up to the next entry.  */
+	  runp = runp->next;
+	}
+
+      if ((act_size + PENALTY) * act_planes < min_total)
+	{
+	  min_total = (act_size + PENALTY) * act_planes;
+	  collate->plane_size = act_size;
+	  collate->plane_cnt = act_planes;
+	}
+
+      ++act_size;
+    }
+  while (act_size < min_total);
+
+  if (nr_wide_elems > 512 && !be_quiet)
+    fputs (_(" done\n"), stderr);
+
+  /* Now that we know how large the table has to be we are able to
+     allocate the array and start adding the characters to the lists
+     in the same way we did it for the multibyte characters.  */
+  collate->wcheads = (struct element_t **)
+    obstack_alloc (&collate->mempool, (collate->plane_size
+				       * collate->plane_cnt
+				       * sizeof (struct element_t *)));
+  memset (collate->wcheads, '\0', (collate->plane_size
+				   * collate->plane_cnt
+				   * sizeof (struct element_t *)));
+
+  /* Start adding.  */
+  runp = collate->start;
+  while (runp != NULL)
+    {
+      if (runp->wcs != NULL)
+	{
+	  struct element_t **eptr;
+	  size_t idx;
+
+	  /* Find a free index.  */
+	  idx = runp->wcs[0] % collate->plane_size;
+	  while (collate->wcheads[idx] != NULL)
+	    {
+	      /* Stop if this is an entry with the same starting character.  */
+	      if (collate->wcheads[idx]->wcs[0] == runp->wcs[0])
+		break;
+
+	      idx += collate->plane_size;
+	    }
+
+	  /* Find the point where to insert in the list.  */
+	  eptr = &collate->wcheads[idx];
+	  while (*eptr != NULL)
+	    {
+	      if ((*eptr)->nwcs < runp->nwcs)
+		break;
+
+	      if ((*eptr)->nwcs == runp->nwcs)
+		{
+		  int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
+				   (wchar_t *) runp->wcs, runp->nwcs);
+
+		  if (c == 0)
+		    {
+		      /* This should not happen.  It means that we have
+			 to symbols with the same byte sequence.  It is
+			 of course an error.  */
+		      error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
+				     _("symbol `%s' has same encoding as"),
+				     (*eptr)->name);
+		      error_at_line (0, 0, runp->file, runp->line,
+				     _("symbol `%s'"), runp->name);
+		      goto dont_insertwc;
+		    }
+		  else if (c < 0)
+		    /* Insert it here.  */
+		    break;
+		}
+
+	      /* To the next entry.  */
+	      eptr = &(*eptr)->wcnext;
+	    }
+
+	  /* Set the pointers.  */
+	  runp->wcnext = *eptr;
+	  *eptr = runp;
+	dont_insertwc:
+	}
+
+      /* Up to the next entry.  */
+      runp = runp->next;
+    }
+
   /* Now determine whether the UNDEFINED entry is needed and if yes,
      whether it was defined.  */
   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
@@ -1620,6 +1799,45 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
 }
 
 
+static int32_t
+output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
+		 struct element_t *elem)
+{
+  size_t cnt;
+  int32_t retval;
+
+  /* Optimize the use of UNDEFINED.  */
+  if (elem == &collate->undefined)
+    /* The weights are already inserted.  */
+    return 0;
+
+  /* This byte can start exactly one collation element and this is
+     a single byte.  We can directly give the index to the weights.  */
+  retval = obstack_object_size (pool);
+
+  /* Construct the weight.  */
+  for (cnt = 0; cnt < nrules; ++cnt)
+    {
+      int32_t buf[elem->weights[cnt].cnt];
+      int32_t i;
+
+      for (i = 0; i < elem->weights[cnt].cnt; ++i)
+	if (elem->weights[cnt].w[i] != NULL)
+	  buf[i] = elem->weights[cnt].w[i]->wcorder;
+
+      /* And add the buffer content.  */
+      if (sizeof (int) == sizeof (int32_t))
+	obstack_int_grow (pool, i);
+      else
+	obstack_grow (pool, &i, sizeof (int32_t));
+
+      obstack_grow (pool, buf, i * sizeof (int32_t));
+    }
+
+  return retval | ((elem->section->ruleidx & 0x7f) << 24);
+}
+
+
 void
 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 		const char *output_path)
@@ -1636,6 +1854,9 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
   struct obstack extrapool;
   struct obstack indirectpool;
   struct section_list *sect;
+  uint32_t *names;
+  uint32_t *tablewc;
+  size_t table_size;
   int i;
 
   data.magic = LIMAGIC (LC_COLLATE);
@@ -1783,7 +2004,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 		int i;
 
 		/* Now add first the initial byte sequence.  */
-		added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
+		added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
 			  + __alignof__ (int32_t) - 1)
 			 & ~(__alignof__ (int32_t) - 1));
 		obstack_make_room (&extrapool, added);
@@ -1809,7 +2030,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 		while (1)
 		  {
 		    if (sizeof (int32_t) == sizeof (int))
-		      obstack_int_grow_fast (&extrapool, weightidx);
+		      obstack_int_grow (&extrapool, weightidx);
 		    else
 		      obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
 
@@ -1833,7 +2054,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 
 		weightidx = output_weight (&weightpool, collate, runp);
 		if (sizeof (int32_t) == sizeof (int))
-		  obstack_int_grow_fast (&extrapool, weightidx);
+		  obstack_int_grow (&extrapool, weightidx);
 		else
 		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
 	      }
@@ -1844,7 +2065,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 		   tested for).  */
 		int i;
 
-		added = ((sizeof (int32_t) + 1 + 1 + runp->nmbs - 1
+		added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
 			  + __alignof__ (int32_t) - 1)
 			 & ~(__alignof__ (int32_t) - 1));
 		obstack_make_room (&extrapool, added);
@@ -1900,7 +2121,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 	  }
       }
 
-  /* Now add the three tables.  */
+  /* Now add the four tables.  */
   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
   iov[2 + cnt].iov_base = tablemb;
   iov[2 + cnt].iov_len = sizeof (tablemb);
@@ -1926,6 +2147,211 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
   ++cnt;
 
 
+  /* Now the same for the wide character table.  We need to store some
+     more information here.  */
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE));
+  iov[2 + cnt].iov_base = &collate->plane_size;
+  iov[2 + cnt].iov_len = sizeof (collate->plane_size);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS));
+  iov[2 + cnt].iov_base = &collate->plane_cnt;
+  iov[2 + cnt].iov_len = sizeof (collate->plane_cnt);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  /* Construct a table with the names.  The size of the table is the same
+     as the table with the pointers.  */
+  table_size = collate->plane_size * collate->plane_cnt;
+  names = (uint32_t *) alloca (table_size * sizeof (uint32_t));
+  for (ch = 0; ch < table_size; ++ch)
+    if (collate->wcheads[ch] == NULL)
+      names[ch] = 0;
+    else
+      names[ch] = collate->wcheads[ch]->wcs[0];
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NAMES));
+  iov[2 + cnt].iov_base = names;
+  iov[2 + cnt].iov_len = table_size * sizeof (uint32_t);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  /* Generate the table.  Walk through the lists of sequences
+     starting with the same byte and add them one after the other to
+     the table.  In case we have more than one sequence starting with
+     the same byte we have to use extra indirection.  */
+  tablewc = (uint32_t *) alloca (table_size * sizeof (uint32_t));
+  for (ch = 0; ch < table_size; ++ch)
+    if (collate->wcheads[ch] == NULL)
+      {
+	/* Set the entry to zero.  */
+	tablewc[ch] = 0;
+      }
+    else if (collate->wcheads[ch]->wcnext == NULL
+	&& collate->wcheads[ch]->nwcs == 1)
+      {
+	tablewc[ch] = output_weightwc (&weightpool, collate,
+				       collate->wcheads[ch]);
+      }
+    else
+      {
+	/* As for the singlebyte table, we recognize sequences and
+	   compress them.  */
+	struct element_t *runp = collate->wcheads[ch];
+	struct element_t *lastp;
+
+	tablewc[ch] = -obstack_object_size (&extrapool);
+
+	do
+	  {
+	    /* Store the current index in the weight table.  We know that
+	       the current position in the `extrapool' is aligned on a
+	       32-bit address.  */
+	    int32_t weightidx;
+	    int added;
+
+	    /* Output the weight info.  */
+	    weightidx = output_weightwc (&weightpool, collate, runp);
+
+	    /* Find out wether this is a single entry or we have more than
+	       one consecutive entry.  */
+	    if (runp->wcnext != NULL
+		&& runp->nwcs == runp->wcnext->nwcs
+		&& wmemcmp ((wchar_t *) runp->wcs,
+			    (wchar_t *)runp->wcnext->wcs, runp->nwcs - 1) == 0
+		&& (runp->wcs[runp->nwcs - 1] + 1
+		    == runp->wcnext->wcs[runp->nwcs - 1]))
+	      {
+		int i;
+
+		/* Now add first the initial byte sequence.  */
+		added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
+		if (sizeof (int32_t) == sizeof (int))
+		  obstack_make_room (&extrapool, added);
+
+		/* More than one consecutive entry.  We mark this by having
+		   a negative index into the indirect table.  */
+		if (sizeof (int32_t) == sizeof (int))
+		  {
+		    obstack_int_grow_fast (&extrapool,
+					   obstack_object_size (&indirectpool)
+					   / sizeof (int32_t));
+		    obstack_int_grow_fast (&extrapool, runp->nwcs - 1);
+		  }
+		else
+		  {
+		    int32_t i = (obstack_object_size (&indirectpool)
+				 / sizeof (int32_t));
+		    obstack_grow (&extrapool, &i, sizeof (int32_t));
+		    i = runp->nwcs - 1;
+		    obstack_grow (&extrapool, &i, sizeof (int32_t));
+		  }
+		for (i = 1; i < runp->nwcs; ++i)
+		  if (sizeof (int32_t) == sizeof (int))
+		    obstack_int_grow_fast (&extrapool, runp->wcs[i]);
+		  else
+		    obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t));
+
+		/* Now find the end of the consecutive sequence and
+                   add all the indeces in the indirect pool.  */
+		while (1)
+		  {
+		    if (sizeof (int32_t) == sizeof (int))
+		      obstack_int_grow (&extrapool, weightidx);
+		    else
+		      obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
+
+		    runp = runp->next;
+		    if (runp->wcnext == NULL
+			|| runp->nwcs != runp->wcnext->nwcs
+			|| wmemcmp ((wchar_t *) runp->wcs,
+				    (wchar_t *) runp->wcnext->wcs,
+				    runp->nwcs - 1) != 0
+			|| (runp->wcs[runp->nwcs - 1] + 1
+			    != runp->wcnext->wcs[runp->nwcs - 1]))
+		      break;
+
+		    /* Insert the weight.  */
+		    weightidx = output_weightwc (&weightpool, collate, runp);
+		  }
+
+		/* And add the end byte sequence.  Without length this
+                   time.  */
+		for (i = 1; i < runp->nwcs; ++i)
+		  if (sizeof (int32_t) == sizeof (int))
+		    obstack_int_grow (&extrapool, runp->wcs[i]);
+		  else
+		    obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t));
+
+		weightidx = output_weightwc (&weightpool, collate, runp);
+		if (sizeof (int32_t) == sizeof (int))
+		  obstack_int_grow (&extrapool, weightidx);
+		else
+		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
+	      }
+	    else
+	      {
+		/* A single entry.  Simply add the index and the length and
+		   string (except for the first character which is already
+		   tested for).  */
+		int i;
+
+		added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
+		if (sizeof (int) == sizeof (int32_t))
+		  obstack_make_room (&extrapool, added);
+
+		if (sizeof (int32_t) == sizeof (int))
+		  {
+		    obstack_int_grow_fast (&extrapool, weightidx);
+		    obstack_int_grow_fast (&extrapool, runp->nwcs - 1);
+		  }
+		else
+		  {
+		    int32_t l = runp->nwcs - 1;
+		    obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
+		    obstack_grow (&extrapool, &l, sizeof (int32_t));
+		  }
+		for (i = 1; i < runp->nwcs; ++i)
+		  if (sizeof (int32_t) == sizeof (int))
+		    obstack_int_grow_fast (&extrapool, runp->wcs[i]);
+		  else
+		    obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t));
+	      }
+
+	    /* Next entry.  */
+	    lastp = runp;
+	    runp = runp->wcnext;
+	  }
+	while (runp != NULL);
+      }
+
+  /* Now add the four tables.  */
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
+  iov[2 + cnt].iov_base = tablewc;
+  iov[2 + cnt].iov_len = table_size * sizeof (int32_t);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
+  iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
+  iov[2 + cnt].iov_base = obstack_finish (&weightpool);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
+  iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
+  iov[2 + cnt].iov_base = obstack_finish (&extrapool);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
+  iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
+  iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+
   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
 
   write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index d68f618d0d..bfaf6c7d09 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -2937,16 +2937,29 @@ allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
      table.  CH % TABSIZE is used as an index.  To solve multiple hits
      we have N planes.  This guarantees a fixed search time for a
      character [N / 2].  In the following code we determine the minimum
-     value for TABSIZE * N, where TABSIZE >= 256.  */
+     value for TABSIZE * N, where TABSIZE >= 256.
+
+     Some people complained that this algorithm takes too long.  Well,
+     go on, improve it.  But changing the step size is *not* an
+     option.  Some people changed this to use only sizes of prime
+     numbers.  Think again, do some math.  We are looking for the
+     optimal solution, not something which works in general.  Unless
+     somebody can provide a dynamic programming solution I think this
+     implementation is as good as it can get.  */
   size_t min_total = UINT_MAX;
   size_t act_size = 256;
 
-  if (!be_quiet)
+  if (!be_quiet && ctype->charnames_act > 512)
     fputs (_("\
 Computing table size for character classes might take a while..."),
 	   stderr);
 
-  while (act_size < min_total)
+  /* While we want to have a small total size we are willing to use a
+     little bit larger table if this reduces the number of layers.
+     Therefore we add a little penalty to the number of planes.
+     Maybe this constant has to be adjusted a bit.  */
+#define PENALTY 128
+  do
     {
       size_t cnt[act_size];
       size_t act_planes = 1;
@@ -2964,22 +2977,23 @@ Computing table size for character classes might take a while..."),
 	    if (++cnt[nr] > act_planes)
 	      {
 		act_planes = cnt[nr];
-		if (act_size * act_planes >= min_total)
+		if ((act_size + PENALTY) * act_planes >= min_total)
 		  break;
 	      }
 	  }
 
-      if (act_size * act_planes < min_total)
+      if ((act_size + PENALTY) * act_planes < min_total)
 	{
-	  min_total = act_size * act_planes;
+	  min_total = (act_size + PENALTY) * act_planes;
 	  ctype->plane_size = act_size;
 	  ctype->plane_cnt = act_planes;
 	}
 
       ++act_size;
     }
+  while (act_size < min_total);
 
-  if (!be_quiet)
+  if (!be_quiet && ctype->charnames_act > 512)
     fputs (_(" done\n"), stderr);