about summary refs log tree commit diff
path: root/locale/weight.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-25 23:41:39 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-25 23:41:39 +0000
commit450bf66ef223ad83e7032920652445817865770b (patch)
tree1bfd6848a2453f4ad2c9cdca8e4e4c817e995798 /locale/weight.h
parentce40141c6b68a40687f460450e1d07a0a78e1559 (diff)
downloadglibc-450bf66ef223ad83e7032920652445817865770b.tar.gz
glibc-450bf66ef223ad83e7032920652445817865770b.tar.xz
glibc-450bf66ef223ad83e7032920652445817865770b.zip
Update.
1999-12-25  Ulrich Drepper  <drepper@cygnus.com>

	* locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the
	indirect table.
	* locale/langinfo.h: Likewise.
	* locale/categories.def: Likewise.  Remove reference to postload
	functions.
	* locale/lc-collate.c (_nl_postload_collate): Removed.  Also remove
	__collate_tablemb, __collate_weightmb, and __collate_extramb.
	* locale/localeinfo.h: Remove declaration for removed variables above.
	Remove prototype for _nl_get_era_entry.
	* locale/weight.h: Complete rewrite for new collate implementation.
	* locale/programs/ld-collate.c: Many changes to make output file
	usable in strxfrm/strcoll.
	* string/strxfrm.c: Complete rewrite for new collate implementation.
	* wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation
	locally.

1999-12-25  Shinya Hanataka  <hanataka@abyss.rim.or.jp>

	* locale/programs/ld-ctype.c (allocate_arrays): Correctly assign
	transformation values for chars >255.
	* wctype/wctrans.c: Return pointer unmodified.
Diffstat (limited to 'locale/weight.h')
-rw-r--r--locale/weight.h251
1 files changed, 83 insertions, 168 deletions
diff --git a/locale/weight.h b/locale/weight.h
index 6e31e2d495..356ee57855 100644
--- a/locale/weight.h
+++ b/locale/weight.h
@@ -17,191 +17,106 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-#include <alloca.h>
-#include <errno.h>
-#include <langinfo.h>
-#include "localeinfo.h"
-
-#ifndef STRING_TYPE
-# error STRING_TYPE not defined
-#endif
+/* Find index of weight.  */
+static inline int32_t
+findidx (const unsigned char **cpp)
+{
+  int_fast32_t i = table[*(*cpp)++];
+  const unsigned char *cp;
 
-#ifndef USTRING_TYPE
-# error USTRING_TYPE not defined
-#endif
+  if (i >= 0)
+    /* This is an index into the weight table.  Cool.  */
+    return i;
 
-typedef struct weight_t
-{
-  struct weight_t *prev;
-  struct weight_t *next;
-  struct data_pair
+  /* Oh well, more than one sequence starting with this byte.
+     Search for the correct one.  */
+  cp = &extra[-i];
+  while (1)
     {
-      int number;
-      const uint32_t *value;
-    } data[0];
-} weight_t;
-
-
-/* The following five macros grant access to the values in the
-   collate locale file that do not depend on byte order.  */
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
-# define collate_nrules \
-  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES))
-# define collate_hash_size \
-  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE))
-# define collate_hash_layers \
-  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS))
-# define collate_undefined \
-  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_UNDEFINED_WC))
-# define collate_rules \
-  ((uint32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULES))
-
-static __inline void get_weight (const STRING_TYPE **str, weight_t *result);
-static __inline void
-get_weight (const STRING_TYPE **str, weight_t *result)
-#else
-# define collate_nrules \
-  current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word
-# define collate_hash_size \
-  current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word
-# define collate_hash_layers \
-  current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word
-# define collate_undefined \
-  current->values[_NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED_WC)].word
-# define collate_rules \
-  ((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULES)].string)
-
-static __inline void get_weight (const STRING_TYPE **str, weight_t *result,
-				 struct locale_data *current,
-				 const uint32_t *__collate_tablewc,
-				 const uint32_t *__collate_extrawc);
-static __inline void
-get_weight (const STRING_TYPE **str, weight_t *result,
-	    struct locale_data *current, const uint32_t *__collate_tablewc,
-	    const uint32_t *__collate_extrawc)
-#endif
-{
-  unsigned int ch = *((USTRING_TYPE *) (*str))++;
-  size_t slot;
+      size_t nhere;
+      const unsigned char *usrc = *cpp;
 
-  if (sizeof (STRING_TYPE) == 1)
-    slot = ch * (collate_nrules + 1);
-  else
-    {
-      const size_t level_size = collate_hash_size * (collate_nrules + 1);
-      size_t level;
+      /* The first thing is the index.  */
+      i = *((int32_t *) cp);
+      cp += sizeof (int32_t);
 
-      slot = (ch % collate_hash_size) * (collate_nrules + 1);
+      /* Next is the length of the byte sequence.  These are always
+	 short byte sequences so there is no reason to call any
+	 function (even if they are inlined).  */
+      nhere = *cp++;
 
-      level = 0;
-      while (__collate_tablewc[slot] != (uint32_t) ch)
+      if (i >= 0)
 	{
-	  if (__collate_tablewc[slot + 1] == 0
-	      || ++level >= collate_hash_layers)
-	    {
-	      size_t idx = collate_undefined;
-	      size_t cnt;
+	  /* It is a single character.  If it matches we found our
+	     index.  Note that at the end of each list there is an
+	     entry of length zero which represents the single byte
+	     sequence.  The first (and here only) byte was tested
+	     already.  */
+	  size_t cnt;
 
-	      for (cnt = 0; cnt < collate_nrules; ++cnt)
-		{
-		  result->data[cnt].number = __collate_extrawc[idx++];
-		  result->data[cnt].value = &__collate_extrawc[idx];
-		  idx += result->data[cnt].number;
-		}
-	      /* The Unix standard requires that a character outside
-		 the domain is signalled by setting `errno'.  */
-	      __set_errno (EINVAL);
-	      return;
-	    }
-	  slot += level_size;
-	}
-    }
+	  for (cnt = 0; cnt < nhere; ++cnt)
+	    if (cp[cnt] != usrc[cnt])
+	      break;
 
-  if (__collate_tablewc[slot + 1] != (uint32_t) FORWARD_CHAR)
-    {
-      /* We have a simple form.  One value for each weight.  */
-      size_t cnt;
+	  if (cnt == nhere)
+	    {
+	      /* Found it.  */
+	      *cpp += nhere;
+	      return i;
+	    }
 
-      for (cnt = 0; cnt < collate_nrules; ++cnt)
-	{
-	  result->data[cnt].number = 1;
-	  result->data[cnt].value = &__collate_tablewc[slot + 1 + cnt];
+	  /* Up to the next entry.  */
+	  cp += nhere;
 	}
-      return;
-    }
+      else
+	{
+	  /* This is a range of characters.  First decide whether the
+	     current byte sequence lies in the range.  */
+	  size_t cnt;
+	  size_t offset = 0;
 
-  /* We now look for any collation element which starts with CH.
-     There might none, but the last list member is a catch-all case
-     because it is simple the character CH.  The value of this entry
-     might be the same as UNDEFINED.  */
-  slot = __collate_tablewc[slot + 2];
+	  for (cnt = 0; cnt < nhere; ++cnt)
+	    if (cp[cnt] != usrc[cnt])
+	      break;
 
-  while (1)
-    {
-      size_t idx;
+	  if (cnt != nhere)
+	    {
+	      if (cp[cnt] > usrc[cnt])
+		{
+		  /* Cannot be in this range.  */
+		  cp += 2 * nhere;
+		  continue;
+		}
 
-      /* This is a comparison between a uint32_t array (aka wchar_t) and
-	 an 8-bit string.  */
-      for (idx = 0; __collate_extrawc[slot + 2 + idx] != 0; ++idx)
-	if (__collate_extrawc[slot + 2 + idx] != (uint32_t) (*str)[idx])
-	  break;
+	      /* Test against the end of the range.  */
+	      for (cnt = 0; cnt < nhere; ++cnt)
+		if (cp[nhere + cnt] != usrc[cnt])
+		  break;
 
-      /* When the loop finished with all character of the collation
-	 element used, we found the longest prefix.  */
-      if (__collate_extrawc[slot + 2 + idx] == 0)
-	{
-	  size_t cnt;
+	      if (cnt != nhere && cp[nhere + cnt] < usrc[cnt])
+		{
+		  /* Cannot be in this range.  */
+		  cp += 2 * nhere;
+		  continue;
+		}
 
-	  *str += idx;
-	  idx += slot + 3;
-	  for (cnt = 0; cnt < collate_nrules; ++cnt)
-	    {
-	      result->data[cnt].number = __collate_extrawc[idx++];
-	      result->data[cnt].value = &__collate_extrawc[idx];
-	      idx += result->data[cnt].number;
+	      /* This range matches the next characters.  Now find
+		 the offset in the indirect table.  */
+	      for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt);
+
+	      do
+		{
+		  offset <<= 8;
+		  offset += usrc[cnt] - cp[cnt];
+		}
+	      while (++cnt < nhere);
 	    }
-	  return;
-	}
 
-      /* To next entry in list.  */
-      slot += __collate_extrawc[slot];
+	  *cpp += nhere;
+	  return offset;
+	}
     }
-}
 
-
-/* To process a string efficiently we retrieve all information about
-   the string at once.  The following macro constructs a double linked
-   list of this information.  It is a macro because we use `alloca'
-   and we use a double linked list because of the backward collation
-   order.
-
-   We have this strange extra macro since the functions which use the
-   given locale (not the global one) cannot use the global tables.  */
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
-# define call_get_weight(strp, newp) get_weight ((strp), (newp))
-#else
-# define call_get_weight(strp, newp) \
-  get_weight ((strp), (newp), current, collate_table, collate_extra)
-#endif
-
-#define get_string(str, forw, backw) \
-  do									      \
-    {									      \
-      weight_t *newp;							      \
-      while (*str != '\0')						      \
-	{								      \
-	  newp = (weight_t *) alloca (sizeof (weight_t)			      \
-				      + (collate_nrules			      \
-					 * sizeof (struct data_pair)));	      \
-									      \
-	  newp->prev = backw;						      \
-	  if (backw == NULL)						      \
-	    forw = newp;						      \
-	  else								      \
-	    backw->next = newp;						      \
-	  newp->next = NULL;						      \
-	  backw = newp;							      \
-	  call_get_weight (&str, newp);					      \
-	}								      \
-    }									      \
-  while (0)
+  /* NOTREACHED */
+  return 0x43219876;
+}