summary refs log tree commit diff
path: root/string
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-31 00:04:07 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-31 00:04:07 +0000
commit83d660c76fb1287f2cd9e6b94ddccb7069a6fae5 (patch)
tree487671d11bec5118f65a145ef262b2a49b11d098 /string
parent9eb157c8750f70adc4c35f09127055444672b63d (diff)
downloadglibc-83d660c76fb1287f2cd9e6b94ddccb7069a6fae5.tar.gz
glibc-83d660c76fb1287f2cd9e6b94ddccb7069a6fae5.tar.xz
glibc-83d660c76fb1287f2cd9e6b94ddccb7069a6fae5.zip
Update.
1999-12-30  Ulrich Drepper  <drepper@cygnus.com>

	* wcsmbs/wcscoll.c: Use multibyte character version.
	* wcsmbs/wcsxfrm.c: Likewise.
	* string/strcoll.c: Prepare to be used for the wide character version.
	* string/strxfrm.c: Likewise.
	* locale/weightwc.h: New file.
Diffstat (limited to 'string')
-rw-r--r--string/strcoll.c101
-rw-r--r--string/strxfrm.c141
2 files changed, 169 insertions, 73 deletions
diff --git a/string/strcoll.c b/string/strcoll.c
index 0f0a45a2d1..32d9124421 100644
--- a/string/strcoll.c
+++ b/string/strcoll.c
@@ -24,24 +24,36 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "../locale/localeinfo.h"
-
-#ifdef USE_IN_EXTENDED_LOCALE_MODEL
-# define STRCOLL __strcoll_l
-#else
-# define STRCOLL strcoll
+#ifndef STRING_TYPE
+# define STRING_TYPE char
+# define USTRING_TYPE unsigned char
+# ifdef USE_IN_EXTENDED_LOCALE_MODEL
+#  define STRCOLL __strcoll_l
+# else
+#  define STRCOLL strcoll
+# endif
+# define STRCMP strcmp
+# define STRLEN strlen
+# define WEIGHT_H "../locale/weight.h"
+# define SUFFIX	MB
+# define L(arg) arg
 #endif
 
+#define CONCAT(a,b) CONCAT1(a,b)
+#define CONCAT1(a,b) a##b
+
+#include "../locale/localeinfo.h"
+
 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
 int
 STRCOLL (s1, s2)
-     const char *s1;
-     const char *s2;
+     const STRING_TYPE *s1;
+     const STRING_TYPE *s2;
 #else
 int
 STRCOLL (s1, s2, l)
-     const char *s1;
-     const char *s2;
+     const STRING_TYPE *s1;
+     const STRING_TYPE *s2;
      __locale_t l;
 #endif
 {
@@ -49,19 +61,19 @@ STRCOLL (s1, s2, l)
   struct locale_data *current = l->__locales[LC_COLLATE];
   uint_fast32_t nrules = *((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].string);
 #else
-  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 #endif
   /* We don't assign the following values right away since it might be
      unnecessary in case there are no rules.  */
   const unsigned char *rulesets;
   const int32_t *table;
-  const unsigned char *weights;
-  const unsigned char *extra;
+  const USTRING_TYPE *weights;
+  const USTRING_TYPE *extra;
   const int32_t *indirect;
   uint_fast32_t pass;
   int result = 0;
-  const unsigned char *us1;
-  const unsigned char *us2;
+  const USTRING_TYPE *us1;
+  const USTRING_TYPE *us2;
   size_t s1len;
   size_t s2len;
   int32_t *idx1arr;
@@ -83,45 +95,62 @@ STRCOLL (s1, s2, l)
   int position;
   int seq1len;
   int seq2len;
-  int use_malloc = 0;
+  int use_malloc;
+#ifdef WIDE_CHAR_VERSION
+  size_t size;
+  size_t layers;
+  const wint_t *names;
+#endif
 
-#include "../locale/weight.h"
+#include WEIGHT_H
 
   if (nrules == 0)
-    return strcmp (s1, s2);
+    return STRCMP (s1, s2);
 
 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
   rulesets = (const unsigned char *)
     current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
   table = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLEMB)].string;
-  weights = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB)].string;
-  extra = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB)].string;
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
+  weights = (const USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
+  extra = (const USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
   indirect = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB)].string;
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+# ifdef WIDE_CHAR_VERSION
+  names = (const wint_t *)
+    current->values[_NL_ITEM_INDEX (_NL_COLLATE_NAMES)].string;
+  size = current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word;
+  layers = current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word;
+# endif
 #else
   rulesets = (const unsigned char *)
     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULESETS);
   table = (const int32_t *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-  weights = (const unsigned char *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
-  extra = (const unsigned char *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_TABLE,SUFFIX));
+  weights = (const USTRING_TYPE *)
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_WEIGHT,SUFFIX));
+  extra = (const USTRING_TYPE *)
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_EXTRA,SUFFIX));
   indirect = (const int32_t *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_INDIRECT,SUFFIX));
+# ifdef WIDE_CHAR_VERSION
+  names = (const wint_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
+  size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
+  layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
+# endif
 #endif
+  use_malloc = 0;
 
   /* We need this a few times.  */
-  s1len = strlen (s1);
-  s2len = strlen (s2);
+  s1len = STRLEN (s1);
+  s2len = STRLEN (s2);
 
   /* We need the elements of the strings as unsigned values since they
      are used as indeces.  */
-  us1 = (const unsigned char *) s1;
-  us2 = (const unsigned char *) s2;
+  us1 = (const USTRING_TYPE *) s1;
+  us2 = (const USTRING_TYPE *) s2;
 
   /* Perform the first pass over the string and while doing this find
      and store the weights for each character.  Since we want this to
@@ -204,7 +233,7 @@ STRCOLL (s1, s2, l)
 	      {
 		backw1_stop = idx1max;
 
-		while (*us1 != '\0')
+		while (*us1 != L('\0'))
 		  {
 		    int32_t tmp = findidx (&us1);
 		    rule1arr[idx1max] = tmp >> 24;
@@ -263,7 +292,7 @@ STRCOLL (s1, s2, l)
 	      {
 		backw2_stop = idx2max;
 
-		while (*us2 != '\0')
+		while (*us2 != L('\0'))
 		  {
 		    int32_t tmp = findidx (&us2);
 		    rule2arr[idx2max] = tmp >> 24;
diff --git a/string/strxfrm.c b/string/strxfrm.c
index 6fc795f9c8..9fd9526008 100644
--- a/string/strxfrm.c
+++ b/string/strxfrm.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
+   Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
@@ -23,15 +23,29 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "../locale/localeinfo.h"
-
-#ifdef USE_IN_EXTENDED_LOCALE_MODEL
-# define STRXFRM __strxfrm_l
-#else
-# define STRXFRM strxfrm
+#ifndef STRING_TYPE
+# define STRING_TYPE char
+# define USTRING_TYPE unsigned char
+# ifdef USE_IN_EXTENDED_LOCALE_MODEL
+#  define STRXFRM __strxfrm_l
+# else
+#  define STRXFRM strxfrm
+# endif
+# define STRCMP strcmp
+# define STRLEN strlen
+# define STPNCPY __stpncpy
+# define WEIGHT_H "../locale/weight.h"
+# define SUFFIX	MB
+# define L(arg) arg
 #endif
 
+#define CONCAT(a,b) CONCAT1(a,b)
+#define CONCAT1(a,b) a##b
 
+#include "../locale/localeinfo.h"
+
+
+#ifndef WIDE_CHAR_VERSION
 /* These are definitions used by some of the functions for handling
    UTF-8 encoding below.  */
 static const uint32_t encoding_mask[] =
@@ -79,14 +93,15 @@ utf8_encode (char *buf, int val)
 
   return buf - startp;
 }
+#endif
 
 
 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
 size_t
-STRXFRM (char *dest, const char *src, size_t n)
+STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n)
 #else
 size_t
-STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
+STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
 #endif
 {
 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
@@ -99,25 +114,30 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
      unnecessary in case there are no rules.  */
   const unsigned char *rulesets;
   const int32_t *table;
-  const unsigned char *weights;
-  const unsigned char *extra;
+  const USTRING_TYPE *weights;
+  const USTRING_TYPE *extra;
   const int32_t *indirect;
   uint_fast32_t pass;
   size_t needed;
-  const unsigned char *usrc;
-  size_t srclen = strlen (src);
+  const USTRING_TYPE *usrc;
+  size_t srclen = STRLEN (src);
   int32_t *idxarr;
   unsigned char *rulearr;
   size_t idxmax;
   size_t idxcnt;
-  int use_malloc = 0;
+  int use_malloc;
+#ifdef WIDE_CHAR_VERSION
+  size_t size;
+  size_t layers;
+  const wint_t *names;
+#endif
 
-#include "../locale/weight.h"
+#include WEIGHT_H
 
   if (nrules == 0)
     {
       if (n != 0)
-	__stpncpy (dest, src, n);
+	STPNCPY (dest, src, n);
 
       return srclen;
     }
@@ -126,37 +146,49 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
   rulesets = (const unsigned char *)
     current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
   table = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLEMB)].string;
-  weights = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB)].string;
-  extra = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB)].string;
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
+  weights = (const USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
+  extra = (const USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
   indirect = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB)].string;
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+# ifdef WIDE_CHAR_VERSION
+  names = (const wint_t *)
+    current->values[_NL_ITEM_INDEX (_NL_COLLATE_NAMES)].string;
+  size = current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word;
+  layers = current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word;
+# endif
 #else
   rulesets = (const unsigned char *)
     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULESETS);
   table = (const int32_t *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-  weights = (const unsigned char *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
-  extra = (const unsigned char *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_TABLE,SUFFIX));
+  weights = (const USTRING_TYPE *)
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_WEIGHT,SUFFIX));
+  extra = (const USTRING_TYPE *)
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_EXTRA,SUFFIX));
   indirect = (const int32_t *)
-    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+    _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_INDIRECT,SUFFIX));
+# ifdef WIDE_CHAR_VERSION
+  names = (const wint_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
+  size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
+  layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
+# endif
 #endif
+  use_malloc = 0;
 
   /* Handle an empty string as a special case.  */
   if (srclen == 0)
     {
       if (n != 0)
-        *dest = '\0';
+        *dest = L('\0');
       return 1;
     }
 
   /* We need the elements of the string as unsigned values since they
      are used as indeces.  */
-  usrc = (const unsigned char *) src;
+  usrc = (const USTRING_TYPE *) src;
 
   /* Perform the first pass over the string and while doing this find
      and store the weights for each character.  Since we want this to
@@ -195,7 +227,7 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 
       ++idxmax;
     }
-  while (*usrc != '\0');
+  while (*usrc != L('\0'));
 
   /* Now the passes over the weights.  We now use the indeces we found
      before.  */
@@ -287,8 +319,10 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
       else
 	{
 	  int val = 1;
+#ifndef WIDE_CHAR_VERSION
 	  char buf[7];
 	  size_t buflen;
+#endif
 	  size_t i;
 
 	  for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
@@ -307,6 +341,16 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 			  len = weights[idxarr[backw]++];
 			  if (len != 0)
 			    {
+#ifdef WIDE_CHAR_VERSION
+			      if (needed + 1 + len < n)
+				{
+				  dest[needed] = val;
+				  for (i = 0; i < len; ++i)
+				    dest[needed + 1 + i] =
+				      weights[idxarr[backw] + i];
+				}
+			      needed += 1 + len;
+#else
 			      buflen = utf8_encode (buf, val);
 			      if (needed + buflen + len < n)
 				{
@@ -316,8 +360,9 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 				    dest[needed + buflen + i] =
 				      weights[idxarr[backw] + i];
 				}
-			      idxarr[backw] += len;
 			      needed += buflen + len;
+#endif
+			      idxarr[backw] += len;
 			      val = 1;
 			    }
 			  else
@@ -331,6 +376,16 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 		  len = weights[idxarr[idxcnt]++];
 		  if (len != 0)
 		    {
+#ifdef WIDE_CHAR_VERSION
+		      if (needed + 1+ len < n)
+			{
+			  dest[needed] = val;
+			  for (i = 0; i < len; ++i)
+			    dest[needed + 1 + i] =
+			      weights[idxarr[idxcnt] + i];
+			}
+		      needed += 1 + len;
+#else
 		      buflen = utf8_encode (buf, val);
 		      if (needed + buflen + len < n)
 			{
@@ -340,8 +395,9 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 			    dest[needed + buflen + i] =
 			      weights[idxarr[idxcnt] + i];
 			}
-		      idxarr[idxcnt] += len;
 		      needed += buflen + len;
+#endif
+		      idxarr[idxcnt] += len;
 		      val = 1;
 		    }
 		  else
@@ -370,6 +426,16 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 		  size_t len = weights[idxarr[--backw]++];
 		  if (len != 0)
 		    {
+#ifdef WIDE_CHAR_VERSION
+		      if (needed + 1 + len < n)
+			{
+			  dest[needed] = val;
+			  for (i = 0; i < len; ++i)
+			    dest[needed + 1 + i] =
+			      weights[idxarr[backw] + i];
+			}
+		      needed += 1 + len;
+#else
 		      buflen = utf8_encode (buf, val);
 		      if (needed + buflen + len < n)
 			{
@@ -379,8 +445,9 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 			    dest[needed + buflen + i] =
 			      weights[idxarr[backw] + i];
 			}
-		      idxarr[backw] += len;
 		      needed += buflen + len;
+#endif
+		      idxarr[backw] += len;
 		      val = 1;
 		    }
 		  else
@@ -392,7 +459,7 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
       /* Finally store the byte to separate the passes or terminate
 	 the string.  */
       if (needed < n)
-	dest[needed] = pass + 1 < nrules ? '\1' : '\0';
+	dest[needed] = pass + 1 < nrules ? L('\1') : L('\0');
       ++needed;
     }
 
@@ -400,11 +467,11 @@ STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
      a `position' rule at the end and if no non-ignored character
      is found the last \1 byte is immediately followed by a \0 byte
      signalling this.  We can avoid the \1 byte(s).  */
-  if (needed <= n && needed > 2 && dest[needed - 2] == '\1')
+  if (needed <= n && needed > 2 && dest[needed - 2] == L('\1'))
     {
       /* Remove the \1 byte.  */
       --needed;
-      dest[needed - 1] = '\0';
+      dest[needed - 1] = L('\0');
     }
 
   /* Free the memory if needed.  */