about summary refs log tree commit diff
path: root/posix/fnmatch.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-01-18 01:46:57 +0000
committerUlrich Drepper <drepper@redhat.com>2000-01-18 01:46:57 +0000
commit1fc82a56bcf83467050101dc06a73f9d7c124693 (patch)
treeb9881b337fcbd274be015d2b5410e102532fd2cb /posix/fnmatch.c
parent3c8333780571c599b97ac94f359c76e5c51434c7 (diff)
downloadglibc-1fc82a56bcf83467050101dc06a73f9d7c124693.tar.gz
glibc-1fc82a56bcf83467050101dc06a73f9d7c124693.tar.xz
glibc-1fc82a56bcf83467050101dc06a73f9d7c124693.zip
Update.
	* posix/fnmatch.c: Put internal_fnmatch in separate file.  Include
	twice, to define singlebyte and multibyte version.
	(fnmatch): Differentiate between singlebyte and multibyte locale and
	call appropriate internal function.
	* posix/fnmatch_loop.c: New file.
	* posix/Makefile (distribute): Add fnmatch_loop.c.
Diffstat (limited to 'posix/fnmatch.c')
-rw-r--r--posix/fnmatch.c419
1 files changed, 108 insertions, 311 deletions
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 1f4ead5f98..2dbebfe31e 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
+/* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    This library is free software; you can redistribute it and/or
@@ -25,6 +25,7 @@
 # define _GNU_SOURCE	1
 #endif
 
+#include <assert.h>
 #include <errno.h>
 #include <fnmatch.h>
 #include <ctype.h>
@@ -104,6 +105,12 @@
 #  else
 #   define IS_CHAR_CLASS(string) wctype (string)
 #  endif
+
+#  if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC
+/* In this case we are implementing the multibyte character handling.  */
+#   define HANDLE_MULTIBYTE	1
+#  endif
+
 # else
 #  define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
 
@@ -148,333 +155,87 @@ __strchrnul (s, c)
 #  define internal_function
 # endif
 
-/* Match STRING against the filename pattern PATTERN, returning zero if
-   it matches, nonzero if not.  */
-static int internal_fnmatch __P ((const char *pattern, const char *string,
-				  int no_leading_period, int flags))
-     internal_function;
-static int
-internal_function
-internal_fnmatch (pattern, string, no_leading_period, flags)
-     const char *pattern;
-     const char *string;
-     int no_leading_period;
-     int flags;
-{
-  register const char *p = pattern, *n = string;
-  register unsigned char c;
-
 /* Note that this evaluates C many times.  */
 # ifdef _LIBC
 #  define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
 # else
 #  define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
 # endif
+# define CHAR	char
+# define UCHAR	unsigned char
+# define FCT	internal_fnmatch
+# define L(CS)	CS
+# define STRCHR(S, C)	strchr (S, C)
+# define STRCHRNUL(S, C) __strchrnul (S, C)
+# include "fnmatch_loop.c"
 
-  while ((c = *p++) != '\0')
-    {
-      c = FOLD (c);
-
-      switch (c)
-	{
-	case '?':
-	  if (*n == '\0')
-	    return FNM_NOMATCH;
-	  else if (*n == '/' && (flags & FNM_FILE_NAME))
-	    return FNM_NOMATCH;
-	  else if (*n == '.' && no_leading_period
-		   && (n == string
-		       || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
-	    return FNM_NOMATCH;
-	  break;
-
-	case '\\':
-	  if (!(flags & FNM_NOESCAPE))
-	    {
-	      c = *p++;
-	      if (c == '\0')
-		/* Trailing \ loses.  */
-		return FNM_NOMATCH;
-	      c = FOLD (c);
-	    }
-	  if (FOLD ((unsigned char) *n) != c)
-	    return FNM_NOMATCH;
-	  break;
-
-	case '*':
-	  if (*n == '.' && no_leading_period
-	      && (n == string
-		  || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
-	    return FNM_NOMATCH;
-
-	  for (c = *p++; c == '?' || c == '*'; c = *p++)
-	    {
-	      if (*n == '/' && (flags & FNM_FILE_NAME))
-		/* A slash does not match a wildcard under FNM_FILE_NAME.  */
-		return FNM_NOMATCH;
-	      else if (c == '?')
-		{
-		  /* A ? needs to match one character.  */
-		  if (*n == '\0')
-		    /* There isn't another character; no match.  */
-		    return FNM_NOMATCH;
-		  else
-		    /* One character of the string is consumed in matching
-		       this ? wildcard, so *??? won't match if there are
-		       less than three characters.  */
-		    ++n;
-		}
-	    }
-
-	  if (c == '\0')
-	    /* The wildcard(s) is/are the last element of the pattern.
-	       If the name is a file name and contains another slash
-	       this does mean it cannot match.  */
-	    return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL
-		    ? FNM_NOMATCH : 0);
-	  else
-	    {
-	      const char *endp;
-
-	      endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0');
-
-	      if (c == '[')
-		{
-		  int flags2 = ((flags & FNM_FILE_NAME)
-				? flags : (flags & ~FNM_PERIOD));
-
-		  for (--p; n < endp; ++n)
-		    if (internal_fnmatch (p, n,
-					  (no_leading_period
-					   && (n == string
-					       || (n[-1] == '/'
-						   && (flags
-						       & FNM_FILE_NAME)))),
-					  flags2)
-			== 0)
-		      return 0;
-		}
-	      else if (c == '/' && (flags & FNM_FILE_NAME))
-		{
-		  while (*n != '\0' && *n != '/')
-		    ++n;
-		  if (*n == '/'
-		      && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD,
-					    flags) == 0))
-		    return 0;
-		}
-	      else
-		{
-		  int flags2 = ((flags & FNM_FILE_NAME)
-				? flags : (flags & ~FNM_PERIOD));
-
-		  if (c == '\\' && !(flags & FNM_NOESCAPE))
-		    c = *p;
-		  c = FOLD (c);
-		  for (--p; n < endp; ++n)
-		    if (FOLD ((unsigned char) *n) == c
-			&& (internal_fnmatch (p, n,
-					      (no_leading_period
-					       && (n == string
-						   || (n[-1] == '/'
-						       && (flags
-							   & FNM_FILE_NAME)))),
-					      flags2) == 0))
-		      return 0;
-		}
-	    }
-
-	  /* If we come here no match is possible with the wildcard.  */
-	  return FNM_NOMATCH;
-
-	case '[':
-	  {
-	    /* Nonzero if the sense of the character class is inverted.  */
-	    static int posixly_correct;
-	    register int not;
-	    char cold;
-
-	    if (posixly_correct == 0)
-	      posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
-
-	    if (*n == '\0')
-	      return FNM_NOMATCH;
-
-	    if (*n == '.' && no_leading_period && (n == string
-						   || (n[-1] == '/'
-						       && (flags
-							   & FNM_FILE_NAME))))
-	      return FNM_NOMATCH;
-
-	    if (*n == '/' && (flags & FNM_FILE_NAME))
-	      /* `/' cannot be matched.  */
-	      return FNM_NOMATCH;
-
-	    not = (*p == '!' || (posixly_correct < 0 && *p == '^'));
-	    if (not)
-	      ++p;
-
-	    c = *p++;
-	    for (;;)
-	      {
-		unsigned char fn = FOLD ((unsigned char) *n);
-
-		if (!(flags & FNM_NOESCAPE) && c == '\\')
-		  {
-		    if (*p == '\0')
-		      return FNM_NOMATCH;
-		    c = FOLD ((unsigned char) *p);
-		    ++p;
-
-		    if (c == fn)
-		      goto matched;
-		  }
-		else if (c == '[' && *p == ':')
-		  {
-		    /* Leave room for the null.  */
-		    char str[CHAR_CLASS_MAX_LENGTH + 1];
-		    size_t c1 = 0;
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-		    wctype_t wt;
-# endif
-		    const char *startp = p;
-
-		    for (;;)
-		      {
-			if (c1 == CHAR_CLASS_MAX_LENGTH)
-			  /* The name is too long and therefore the pattern
-			     is ill-formed.  */
-			  return FNM_NOMATCH;
-
-			c = *++p;
-			if (c == ':' && p[1] == ']')
-			  {
-			    p += 2;
-			    break;
-			  }
-			if (c < 'a' || c >= 'z')
-			  {
-			    /* This cannot possibly be a character class name.
-			       Match it as a normal range.  */
-			    p = startp;
-			    c = '[';
-			    goto normal_bracket;
-			  }
-			str[c1++] = c;
-		      }
-		    str[c1] = '\0';
 
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-		    wt = IS_CHAR_CLASS (str);
-		    if (wt == 0)
-		      /* Invalid character class name.  */
-		      return FNM_NOMATCH;
+# if HANDLE_MULTIBYTE
+/* Note that this evaluates C many times.  */
+#  ifdef _LIBC
+#   define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c))
+#  else
+#   define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? towlower (c) : (c))
+#  endif
+#  define CHAR	wchar_t
+#  define UCHAR	wint_t
+#  define FCT	internal_fnwmatch
+#  define L(CS)	L##CS
+#  define __btowc(wc)	wc
+#  define STRCHR(S, C)	wcschr (S, C)
+#  define STRCHRNUL(S, C) __wcschrnul (S, C)
+
+#  undef IS_CHAR_CLASS
+#  ifdef _LIBC
+/* We have to convert the wide character string in a multibyte string.  But
+   we know that the character class names are ASCII strings and since the
+   internal wide character encoding is UCS4 we can use a simplified method
+   to convert the string to a multibyte character string.  */
+static wctype_t
+is_char_class (const wchar_t *wcs)
+{
+  char s[CHAR_CLASS_MAX_LENGTH + 1];
+  char *cp = s;
 
-		    if (__iswctype (__btowc ((unsigned char) *n), wt))
-		      goto matched;
-# else
-		    if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n))
-			|| (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n))
-			|| (STREQ (str, "blank") && ISBLANK ((unsigned char) *n))
-			|| (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n))
-			|| (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n))
-			|| (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n))
-			|| (STREQ (str, "lower") && ISLOWER ((unsigned char) *n))
-			|| (STREQ (str, "print") && ISPRINT ((unsigned char) *n))
-			|| (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n))
-			|| (STREQ (str, "space") && ISSPACE ((unsigned char) *n))
-			|| (STREQ (str, "upper") && ISUPPER ((unsigned char) *n))
-			|| (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n)))
-		      goto matched;
-# endif
-		  }
-		else if (c == '\0')
-		  /* [ (unterminated) loses.  */
-		  return FNM_NOMATCH;
-		else
-		  {
-		  normal_bracket:
-		    if (FOLD (c) == fn)
-		      goto matched;
-
-		    cold = c;
-		    c = *p++;
-
-		    if (c == '-' && *p != ']')
-		      {
-			/* It is a range.  */
-			unsigned char cend = *p++;
-			if (!(flags & FNM_NOESCAPE) && cend == '\\')
-			  cend = *p++;
-			if (cend == '\0')
-			  return FNM_NOMATCH;
-
-			if (cold <= fn && fn <= FOLD (cend))
-			  goto matched;
-
-			c = *p++;
-		      }
-		  }
-
-		if (c == ']')
-		  break;
-	      }
-
-	    if (!not)
-	      return FNM_NOMATCH;
-	    break;
-
-	  matched:
-	    /* Skip the rest of the [...] that already matched.  */
-	    while (c != ']')
-	      {
-		if (c == '\0')
-		  /* [... (unterminated) loses.  */
-		  return FNM_NOMATCH;
-
-		c = *p++;
-		if (!(flags & FNM_NOESCAPE) && c == '\\')
-		  {
-		    if (*p == '\0')
-		      return FNM_NOMATCH;
-		    /* XXX 1003.2d11 is unclear if this is right.  */
-		    ++p;
-		  }
-		else if (c == '[' && *p == ':')
-		  {
-		    do
-		      if (*++p == '\0')
-			return FNM_NOMATCH;
-		    while (*p != ':' || p[1] == ']');
-		    p += 2;
-		    c = *p;
-		  }
-	      }
-	    if (not)
-	      return FNM_NOMATCH;
-	  }
-	  break;
-
-	default:
-	  if (c != FOLD ((unsigned char) *n))
-	    return FNM_NOMATCH;
-	}
-
-      ++n;
+  do
+    {
+      if (*wcs < 0x20 || *wcs >= 0x7f)
+	return 0;
+
+      *cp++ = (char) *wcs;
     }
+  while (*wcs++ != L'\0');
 
-  if (*n == '\0')
-    return 0;
+  return __wctype (s);
+}
+#  else
+/* Since we cannot assume anything about the internal encoding we have to
+   convert the string back to multibyte representation the hard way.  */
+static wctype_t
+is_char_class (const wchar_t *wcs)
+{
+  mstate_t ps;
+  char *s;
+  size_t n;
+
+  memset (&ps, '\0', sizeof (ps));
 
-  if ((flags & FNM_LEADING_DIR) && *n == '/')
-    /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
+  n = wcsrtombs (NULL, wcs, 0, &ps);
+  if (n == (size_t) -1)
+    /* Something went wrong.  */
     return 0;
 
-  return FNM_NOMATCH;
+  s = alloca (n + 1);
+  assert (mbsinit (&ps));
+  (void) wcsrtombs (s, wcs, n + 1, &ps);
 
-# undef FOLD
+  return __wctype (s);
 }
+#  endif
+#  define IS_CHAR_CLASS(string) is_char_class (string)
 
+#  include "fnmatch_loop.c"
+# endif
 
 int
 fnmatch (pattern, string, flags)
@@ -482,7 +243,43 @@ fnmatch (pattern, string, flags)
      const char *string;
      int flags;
 {
+# if HANDLE_MULTIBYTE
+  mbstate_t ps;
+  size_t n;
+  wchar_t *wpattern;
+  wchar_t *wstring;
+
+  if (MB_CUR_MAX == 1)
+    /* This is an optimization for 8-bit character set.  */
+    return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags);
+
+  /* Convert the strings into wide characters.  */
+  memset (&ps, '\0', sizeof (ps));
+  n = mbsrtowcs (NULL, &pattern, 0, &ps);
+  if (n == (size_t) -1)
+    /* Something wrong.
+       XXX Do we have to set `errno' to something which mbsrtows hasn't
+       already done?  */
+    return -1;
+  wpattern = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t));
+  assert (mbsinit (&ps));
+  (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
+
+  assert (mbsinit (&ps));
+  n = mbsrtowcs (NULL, &string, 0, &ps);
+  if (n == (size_t) -1)
+    /* Something wrong.
+       XXX Do we have to set `errno' to something which mbsrtows hasn't
+       already done?  */
+    return -1;
+  wstring = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t));
+  assert (mbsinit (&ps));
+  (void) mbsrtowcs (wstring, &string, n + 1, &ps);
+
+  return internal_fnwmatch (wpattern, wstring, flags & FNM_PERIOD, flags);
+# else
   return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags);
+# endif  /* mbstate_t and mbsrtowcs or _LIBC.  */
 }
 
 #endif	/* _LIBC or not __GNU_LIBRARY__.  */