about summary refs log tree commit diff
path: root/posix
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
committerUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
commitacb5ee2e561276d64c6e26ef4b82f59a4db5ae90 (patch)
tree1f7ebfcaf8bf2874ae5cdb6348205dccfd9499c2 /posix
parentb7cbee1cb029f6471aa069552a69f04a3d1b4d70 (diff)
downloadglibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.gz
glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.xz
glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.zip
Update.
2000-05-24  Ulrich Drepper  <drepper@redhat.com>

	* locale/programs/ld-collate.c (struct element_t): Add mbseqorder
	and wcseqorder members.
	(struct locale_collate_t): Likewise.
	(collate_finish): Assign collation sequence value to each character.
	Create tables for output.
	(collate_output): Write out tables with collation sequence information.
	* locale/C-collate.c: Provide C locale data for collation sequence
	table.
	* locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and
	_NL_COLLATE_COLLSEQWC.
	* locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and
	_NL_COLLATE_COLLSEQWC.
	* posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before
	include fnmatch_loop.c.
	* posix/fnmatch_loop.c: Don't use strcoll while determining whether
	character is matched by range expression.  Use collation sequence
	table.  Outside glibc fall back on simple character value comparison.
Diffstat (limited to 'posix')
-rw-r--r--posix/fnmatch.c15
-rw-r--r--posix/fnmatch_loop.c135
2 files changed, 132 insertions, 18 deletions
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 18abf5da27..c4b11080fe 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -48,6 +48,15 @@
 # include <wctype.h>
 #endif
 
+/* We need some of the locale data (the collation sequence information)
+   but there is no interface to get this information in general.  Therefore
+   we support a correct implementation only in glibc.  */
+#ifdef _LIBC
+# include "../locale/localeinfo.h"
+
+# define CONCAT(a,b) __CONCAT(a,b)
+#endif
+
 /* Comment out all this code if we are using the GNU C Library, and are not
    actually compiling the library itself.  This code is part of the GNU C
    Library, but also included in many other GNU distributions.  Compiling
@@ -192,6 +201,7 @@ __wcschrnul (s, c)
 # define STRCHR(S, C)	strchr (S, C)
 # define STRCHRNUL(S, C) __strchrnul (S, C)
 # define STRCOLL(S1, S2) strcoll (S1, S2)
+# define SUFFIX MB
 # include "fnmatch_loop.c"
 
 
@@ -209,7 +219,10 @@ __wcschrnul (s, c)
 #  define BTOWC(C)	(C)
 #  define STRCHR(S, C)	wcschr (S, C)
 #  define STRCHRNUL(S, C) __wcschrnul (S, C)
-# define STRCOLL(S1, S2) wcscoll (S1, S2)
+#  define STRCOLL(S1, S2) wcscoll (S1, S2)
+#  define SUFFIX WC
+#  define WIDE_CHAR_VERSION 1
+
 
 #  undef IS_CHAR_CLASS
 #  ifdef _LIBC
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 5f6c05710e..831bd0631a 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags)
 {
   register const CHAR *p = pattern, *n = string;
   register UCHAR c;
+#ifdef _LIBC
+  const UCHAR *collseq = (const UCHAR *)
+    _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
+# ifdef WIDE_CHAR_VERSION
+  const wint_t *names = (const wint_t *)
+    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
+  size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
+  size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
+# endif
+#endif
 
   while ((c = *p++) != L('\0'))
     {
@@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags)
 		    /* Leave room for the null.  */
 		    CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 		    size_t c1 = 0;
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 		    wctype_t wt;
-# endif
+#endif
 		    const CHAR *startp = p;
 
 		    for (;;)
@@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags)
 		      }
 		    str[c1] = L('\0');
 
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 		    wt = IS_CHAR_CLASS (str);
 		    if (wt == 0)
 		      /* Invalid character class name.  */
@@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags)
 
 		    if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 		      goto matched;
-# else
+#else
 		    if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 			|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 			|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
@@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags)
 			|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 			|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 		      goto matched;
-# endif
+#endif
 		  }
 		else if (c == L('\0'))
 		  /* [ (unterminated) loses.  */
@@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags)
 
 		    if (c == L('-') && *p != L(']'))
 		      {
-			/* It is a range.  */
-			CHAR lo[2];
-			CHAR fc[2];
+#if _LIBC
+			/* We have to find the collation sequence
+			   value for C.  Collation sequence is nothing
+			   we can regularly access.  The sequence
+			   value is defined by the order in which the
+			   definitions of the collation values for the
+			   various characters appear in the source
+			   file.  A strange concept, nowhere
+			   documented.  */
+			int32_t fseqidx;
+			int32_t lseqidx;
 			UCHAR cend = *p++;
+# ifdef WIDE_CHAR_VERSION
+			size_t cnt;
+# endif
+
 			if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 			  cend = *p++;
 			if (cend == L('\0'))
 			  return FNM_NOMATCH;
 
-			lo[0] = cold;
-			lo[1] = L('\0');
-			fc[0] = fn;
-			fc[1] = L('\0');
-			if (STRCOLL (lo, fc) <= 0)
+# ifdef WIDE_CHAR_VERSION
+			/* Search in the `names' array for the characters.  */
+			fseqidx = fn % size;
+			cnt = 0;
+			while (names[fseqidx] != fn)
 			  {
-			    CHAR hi[2];
-			    hi[0] = FOLD (cend);
-			    hi[1] = L('\0');
-			    if (STRCOLL (fc, hi) <= 0)
+			    if (++cnt == layers)
+			      /* XXX We don't know anything about
+				 the character we are supposed to
+				 match.  This means we are failing.  */
+			      goto range_not_matched;
+
+			    fseqidx += size;
+			  }
+			lseqidx = cold % size;
+			cnt = 0;
+			while (names[lseqidx] != cold)
+			  {
+			    if (++cnt == layers)
+			      {
+				lseqidx = -1;
+				break;
+			      }
+			    lseqidx += size;
+			  }
+# else
+			fseqidx = fn;
+			lseqidx = cold;
+# endif
+
+			/* XXX It is not entirely clear to me how to handle
+			   characters which are not mentioned in the
+			   collation specification.  */
+			if (
+# ifdef WIDE_CHAR_VERSION
+			    lseqidx == -1 ||
+# endif
+			    collseq[lseqidx] <= collseq[fseqidx])
+			  {
+			    /* We have to look at the upper bound.  */
+			    int32_t hseqidx;
+
+			    cend = FOLD (cend);
+# ifdef WIDE_CHAR_VERSION
+			    hseqidx = cend % size;
+			    cnt = 0;
+			    while (names[hseqidx] != cend)
+			      {
+				if (++cnt == layers)
+				  {
+				    /* Hum, no information about the upper
+				       bound.  The matching succeeds if the
+				       lower bound is matched exactly.  */
+				    if (lseqidx == -1 || cold != fn)
+				      goto range_not_matched;
+
+				    goto matched;
+				  }
+			      }
+# else
+			    hseqidx = cend;
+# endif
+
+			    if (
+# ifdef WIDE_CHAR_VERSION
+				(lseqidx == -1
+				 && collseq[fseqidx] == collseq[hseqidx]) ||
+# endif
+				collseq[fseqidx] <= collseq[hseqidx])
 			      goto matched;
 			  }
+# ifdef WIDE_CHAR_VERSION
+		      range_not_matched:
+# endif
+#else
+			/* We use a boring value comparison of the character
+			   values.  This is better than comparing using
+			   `strcoll' since the latter would have surprising
+			   and sometimes fatal consequences.  */
+			UCHAR cend = *p++;
+
+			if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
+			  cend = *p++;
+			if (cend == L('\0'))
+			  return FNM_NOMATCH;
+
+			/* It is a range.  */
+			if (cold <= fc && fc <= c)
+			  goto matched;
+#endif
 
 			c = *p++;
 		      }
@@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags)
 #undef STRCOLL
 #undef L
 #undef BTOWC
+#undef SUFFIX