diff options
Diffstat (limited to 'posix')
-rw-r--r-- | posix/Makefile | 2 | ||||
-rw-r--r-- | posix/fnmatch.c | 419 | ||||
-rw-r--r-- | posix/fnmatch_loop.c | 336 |
3 files changed, 445 insertions, 312 deletions
diff --git a/posix/Makefile b/posix/Makefile index 96c73933e5..674277e48b 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -31,7 +31,7 @@ headers := sys/utsname.h sys/times.h sys/wait.h sys/types.h unistd.h \ distribute := confstr.h TESTS TESTS2C.sed testcases.h \ PTESTS PTESTS2C.sed ptestcases.h \ - globtest.c globtest.sh wordexp-tst.sh annexc.c + globtest.c globtest.sh wordexp-tst.sh annexc.c fnmatch_loop.c routines := \ uname \ diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 1f4ead5f98..2dbebfe31e 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. +/* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. This library is free software; you can redistribute it and/or @@ -25,6 +25,7 @@ # define _GNU_SOURCE 1 #endif +#include <assert.h> #include <errno.h> #include <fnmatch.h> #include <ctype.h> @@ -104,6 +105,12 @@ # else # define IS_CHAR_CLASS(string) wctype (string) # endif + +# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC +/* In this case we are implementing the multibyte character handling. */ +# define HANDLE_MULTIBYTE 1 +# endif + # else # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ @@ -148,333 +155,87 @@ __strchrnul (s, c) # define internal_function # endif -/* Match STRING against the filename pattern PATTERN, returning zero if - it matches, nonzero if not. */ -static int internal_fnmatch __P ((const char *pattern, const char *string, - int no_leading_period, int flags)) - internal_function; -static int -internal_function -internal_fnmatch (pattern, string, no_leading_period, flags) - const char *pattern; - const char *string; - int no_leading_period; - int flags; -{ - register const char *p = pattern, *n = string; - register unsigned char c; - /* Note that this evaluates C many times. */ # ifdef _LIBC # define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) # else # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) # endif +# define CHAR char +# define UCHAR unsigned char +# define FCT internal_fnmatch +# define L(CS) CS +# define STRCHR(S, C) strchr (S, C) +# define STRCHRNUL(S, C) __strchrnul (S, C) +# include "fnmatch_loop.c" - while ((c = *p++) != '\0') - { - c = FOLD (c); - - switch (c) - { - case '?': - if (*n == '\0') - return FNM_NOMATCH; - else if (*n == '/' && (flags & FNM_FILE_NAME)) - return FNM_NOMATCH; - else if (*n == '.' && no_leading_period - && (n == string - || (n[-1] == '/' && (flags & FNM_FILE_NAME)))) - return FNM_NOMATCH; - break; - - case '\\': - if (!(flags & FNM_NOESCAPE)) - { - c = *p++; - if (c == '\0') - /* Trailing \ loses. */ - return FNM_NOMATCH; - c = FOLD (c); - } - if (FOLD ((unsigned char) *n) != c) - return FNM_NOMATCH; - break; - - case '*': - if (*n == '.' && no_leading_period - && (n == string - || (n[-1] == '/' && (flags & FNM_FILE_NAME)))) - return FNM_NOMATCH; - - for (c = *p++; c == '?' || c == '*'; c = *p++) - { - if (*n == '/' && (flags & FNM_FILE_NAME)) - /* A slash does not match a wildcard under FNM_FILE_NAME. */ - return FNM_NOMATCH; - else if (c == '?') - { - /* A ? needs to match one character. */ - if (*n == '\0') - /* There isn't another character; no match. */ - return FNM_NOMATCH; - else - /* One character of the string is consumed in matching - this ? wildcard, so *??? won't match if there are - less than three characters. */ - ++n; - } - } - - if (c == '\0') - /* The wildcard(s) is/are the last element of the pattern. - If the name is a file name and contains another slash - this does mean it cannot match. */ - return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL - ? FNM_NOMATCH : 0); - else - { - const char *endp; - - endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0'); - - if (c == '[') - { - int flags2 = ((flags & FNM_FILE_NAME) - ? flags : (flags & ~FNM_PERIOD)); - - for (--p; n < endp; ++n) - if (internal_fnmatch (p, n, - (no_leading_period - && (n == string - || (n[-1] == '/' - && (flags - & FNM_FILE_NAME)))), - flags2) - == 0) - return 0; - } - else if (c == '/' && (flags & FNM_FILE_NAME)) - { - while (*n != '\0' && *n != '/') - ++n; - if (*n == '/' - && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD, - flags) == 0)) - return 0; - } - else - { - int flags2 = ((flags & FNM_FILE_NAME) - ? flags : (flags & ~FNM_PERIOD)); - - if (c == '\\' && !(flags & FNM_NOESCAPE)) - c = *p; - c = FOLD (c); - for (--p; n < endp; ++n) - if (FOLD ((unsigned char) *n) == c - && (internal_fnmatch (p, n, - (no_leading_period - && (n == string - || (n[-1] == '/' - && (flags - & FNM_FILE_NAME)))), - flags2) == 0)) - return 0; - } - } - - /* If we come here no match is possible with the wildcard. */ - return FNM_NOMATCH; - - case '[': - { - /* Nonzero if the sense of the character class is inverted. */ - static int posixly_correct; - register int not; - char cold; - - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - if (*n == '\0') - return FNM_NOMATCH; - - if (*n == '.' && no_leading_period && (n == string - || (n[-1] == '/' - && (flags - & FNM_FILE_NAME)))) - return FNM_NOMATCH; - - if (*n == '/' && (flags & FNM_FILE_NAME)) - /* `/' cannot be matched. */ - return FNM_NOMATCH; - - not = (*p == '!' || (posixly_correct < 0 && *p == '^')); - if (not) - ++p; - - c = *p++; - for (;;) - { - unsigned char fn = FOLD ((unsigned char) *n); - - if (!(flags & FNM_NOESCAPE) && c == '\\') - { - if (*p == '\0') - return FNM_NOMATCH; - c = FOLD ((unsigned char) *p); - ++p; - - if (c == fn) - goto matched; - } - else if (c == '[' && *p == ':') - { - /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - size_t c1 = 0; -# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) - wctype_t wt; -# endif - const char *startp = p; - - for (;;) - { - if (c1 == CHAR_CLASS_MAX_LENGTH) - /* The name is too long and therefore the pattern - is ill-formed. */ - return FNM_NOMATCH; - - c = *++p; - if (c == ':' && p[1] == ']') - { - p += 2; - break; - } - if (c < 'a' || c >= 'z') - { - /* This cannot possibly be a character class name. - Match it as a normal range. */ - p = startp; - c = '['; - goto normal_bracket; - } - str[c1++] = c; - } - str[c1] = '\0'; -# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) - wt = IS_CHAR_CLASS (str); - if (wt == 0) - /* Invalid character class name. */ - return FNM_NOMATCH; +# if HANDLE_MULTIBYTE +/* Note that this evaluates C many times. */ +# ifdef _LIBC +# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c)) +# else +# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? towlower (c) : (c)) +# endif +# define CHAR wchar_t +# define UCHAR wint_t +# define FCT internal_fnwmatch +# define L(CS) L##CS +# define __btowc(wc) wc +# define STRCHR(S, C) wcschr (S, C) +# define STRCHRNUL(S, C) __wcschrnul (S, C) + +# undef IS_CHAR_CLASS +# ifdef _LIBC +/* We have to convert the wide character string in a multibyte string. But + we know that the character class names are ASCII strings and since the + internal wide character encoding is UCS4 we can use a simplified method + to convert the string to a multibyte character string. */ +static wctype_t +is_char_class (const wchar_t *wcs) +{ + char s[CHAR_CLASS_MAX_LENGTH + 1]; + char *cp = s; - if (__iswctype (__btowc ((unsigned char) *n), wt)) - goto matched; -# else - if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n)) - || (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n)) - || (STREQ (str, "blank") && ISBLANK ((unsigned char) *n)) - || (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n)) - || (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n)) - || (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n)) - || (STREQ (str, "lower") && ISLOWER ((unsigned char) *n)) - || (STREQ (str, "print") && ISPRINT ((unsigned char) *n)) - || (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n)) - || (STREQ (str, "space") && ISSPACE ((unsigned char) *n)) - || (STREQ (str, "upper") && ISUPPER ((unsigned char) *n)) - || (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n))) - goto matched; -# endif - } - else if (c == '\0') - /* [ (unterminated) loses. */ - return FNM_NOMATCH; - else - { - normal_bracket: - if (FOLD (c) == fn) - goto matched; - - cold = c; - c = *p++; - - if (c == '-' && *p != ']') - { - /* It is a range. */ - unsigned char cend = *p++; - if (!(flags & FNM_NOESCAPE) && cend == '\\') - cend = *p++; - if (cend == '\0') - return FNM_NOMATCH; - - if (cold <= fn && fn <= FOLD (cend)) - goto matched; - - c = *p++; - } - } - - if (c == ']') - break; - } - - if (!not) - return FNM_NOMATCH; - break; - - matched: - /* Skip the rest of the [...] that already matched. */ - while (c != ']') - { - if (c == '\0') - /* [... (unterminated) loses. */ - return FNM_NOMATCH; - - c = *p++; - if (!(flags & FNM_NOESCAPE) && c == '\\') - { - if (*p == '\0') - return FNM_NOMATCH; - /* XXX 1003.2d11 is unclear if this is right. */ - ++p; - } - else if (c == '[' && *p == ':') - { - do - if (*++p == '\0') - return FNM_NOMATCH; - while (*p != ':' || p[1] == ']'); - p += 2; - c = *p; - } - } - if (not) - return FNM_NOMATCH; - } - break; - - default: - if (c != FOLD ((unsigned char) *n)) - return FNM_NOMATCH; - } - - ++n; + do + { + if (*wcs < 0x20 || *wcs >= 0x7f) + return 0; + + *cp++ = (char) *wcs; } + while (*wcs++ != L'\0'); - if (*n == '\0') - return 0; + return __wctype (s); +} +# else +/* Since we cannot assume anything about the internal encoding we have to + convert the string back to multibyte representation the hard way. */ +static wctype_t +is_char_class (const wchar_t *wcs) +{ + mstate_t ps; + char *s; + size_t n; + + memset (&ps, '\0', sizeof (ps)); - if ((flags & FNM_LEADING_DIR) && *n == '/') - /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + n = wcsrtombs (NULL, wcs, 0, &ps); + if (n == (size_t) -1) + /* Something went wrong. */ return 0; - return FNM_NOMATCH; + s = alloca (n + 1); + assert (mbsinit (&ps)); + (void) wcsrtombs (s, wcs, n + 1, &ps); -# undef FOLD + return __wctype (s); } +# endif +# define IS_CHAR_CLASS(string) is_char_class (string) +# include "fnmatch_loop.c" +# endif int fnmatch (pattern, string, flags) @@ -482,7 +243,43 @@ fnmatch (pattern, string, flags) const char *string; int flags; { +# if HANDLE_MULTIBYTE + mbstate_t ps; + size_t n; + wchar_t *wpattern; + wchar_t *wstring; + + if (MB_CUR_MAX == 1) + /* This is an optimization for 8-bit character set. */ + return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags); + + /* Convert the strings into wide characters. */ + memset (&ps, '\0', sizeof (ps)); + n = mbsrtowcs (NULL, &pattern, 0, &ps); + if (n == (size_t) -1) + /* Something wrong. + XXX Do we have to set `errno' to something which mbsrtows hasn't + already done? */ + return -1; + wpattern = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t)); + assert (mbsinit (&ps)); + (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); + + assert (mbsinit (&ps)); + n = mbsrtowcs (NULL, &string, 0, &ps); + if (n == (size_t) -1) + /* Something wrong. + XXX Do we have to set `errno' to something which mbsrtows hasn't + already done? */ + return -1; + wstring = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t)); + assert (mbsinit (&ps)); + (void) mbsrtowcs (wstring, &string, n + 1, &ps); + + return internal_fnwmatch (wpattern, wstring, flags & FNM_PERIOD, flags); +# else return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags); +# endif /* mbstate_t and mbsrtowcs or _LIBC. */ } #endif /* _LIBC or not __GNU_LIBRARY__. */ diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c new file mode 100644 index 0000000000..037a26d623 --- /dev/null +++ b/posix/fnmatch_loop.c @@ -0,0 +1,336 @@ +/* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Match STRING against the filename pattern PATTERN, returning zero if + it matches, nonzero if not. */ +static int FCT (const CHAR *pattern, const CHAR *string, + int no_leading_period, int flags) internal_function; + +static int +internal_function +FCT (pattern, string, no_leading_period, flags) + const CHAR *pattern; + const CHAR *string; + int no_leading_period; + int flags; +{ + register const CHAR *p = pattern, *n = string; + register UCHAR c; + + while ((c = *p++) != L('\0')) + { + c = FOLD (c); + + switch (c) + { + case L('?'): + if (*n == L('\0')) + return FNM_NOMATCH; + else if (*n == L('/') && (flags & FNM_FILE_NAME)) + return FNM_NOMATCH; + else if (*n == L('.') && no_leading_period + && (n == string + || (n[-1] == L('/') && (flags & FNM_FILE_NAME)))) + return FNM_NOMATCH; + break; + + case L('\\'): + if (!(flags & FNM_NOESCAPE)) + { + c = *p++; + if (c == L('\0')) + /* Trailing \ loses. */ + return FNM_NOMATCH; + c = FOLD (c); + } + if (FOLD ((UCHAR) *n) != c) + return FNM_NOMATCH; + break; + + case L('*'): + if (*n == L('.') && no_leading_period + && (n == string + || (n[-1] == L('/') && (flags & FNM_FILE_NAME)))) + return FNM_NOMATCH; + + for (c = *p++; c == L('?') || c == L('*'); c = *p++) + { + if (*n == L('/') && (flags & FNM_FILE_NAME)) + /* A slash does not match a wildcard under FNM_FILE_NAME. */ + return FNM_NOMATCH; + else if (c == L('?')) + { + /* A ? needs to match one character. */ + if (*n == L('\0')) + /* There isn't another character; no match. */ + return FNM_NOMATCH; + else + /* One character of the string is consumed in matching + this ? wildcard, so *??? won't match if there are + less than three characters. */ + ++n; + } + } + + if (c == L('\0')) + /* The wildcard(s) is/are the last element of the pattern. + If the name is a file name and contains another slash + this does mean it cannot match. */ + return ((flags & FNM_FILE_NAME) && STRCHR (n, L('/')) != NULL + ? FNM_NOMATCH : 0); + else + { + const CHAR *endp; + + endp = STRCHRNUL (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0')); + + if (c == L('[')) + { + int flags2 = ((flags & FNM_FILE_NAME) + ? flags : (flags & ~FNM_PERIOD)); + + for (--p; n < endp; ++n) + if (FCT (p, n, (no_leading_period + && (n == string + || (n[-1] == L('/') + && (flags & FNM_FILE_NAME)))), + flags2) == 0) + return 0; + } + else if (c == L('/') && (flags & FNM_FILE_NAME)) + { + while (*n != L('\0') && *n != L('/')) + ++n; + if (*n == L('/') + && (FCT (p, n + 1, flags & FNM_PERIOD, flags) == 0)) + return 0; + } + else + { + int flags2 = ((flags & FNM_FILE_NAME) + ? flags : (flags & ~FNM_PERIOD)); + + if (c == L('\\') && !(flags & FNM_NOESCAPE)) + c = *p; + c = FOLD (c); + for (--p; n < endp; ++n) + if (FOLD ((UCHAR) *n) == c + && (FCT (p, n, (no_leading_period + && (n == string + || (n[-1] == L('/') + && (flags & FNM_FILE_NAME)))), + flags2) == 0)) + return 0; + } + } + + /* If we come here no match is possible with the wildcard. */ + return FNM_NOMATCH; + + case L('['): + { + /* Nonzero if the sense of the character class is inverted. */ + static int posixly_correct; + register int not; + CHAR cold; + + if (posixly_correct == 0) + posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; + + if (*n == L('\0')) + return FNM_NOMATCH; + + if (*n == L('.') && no_leading_period + && (n == string + || (n[-1] == L('/') && (flags & FNM_FILE_NAME)))) + return FNM_NOMATCH; + + if (*n == L('/') && (flags & FNM_FILE_NAME)) + /* `/' cannot be matched. */ + return FNM_NOMATCH; + + not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); + if (not) + ++p; + + c = *p++; + for (;;) + { + UCHAR fn = FOLD ((UCHAR) *n); + + if (!(flags & FNM_NOESCAPE) && c == L('\\')) + { + if (*p == L('\0')) + return FNM_NOMATCH; + c = FOLD ((UCHAR) *p); + ++p; + + if (c == fn) + goto matched; + } + else if (c == L('[') && *p == L(':')) + { + /* Leave room for the null. */ + CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; + size_t c1 = 0; +# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + wctype_t wt; +# endif + const CHAR *startp = p; + + for (;;) + { + if (c1 == CHAR_CLASS_MAX_LENGTH) + /* The name is too long and therefore the pattern + is ill-formed. */ + return FNM_NOMATCH; + + c = *++p; + if (c == L(':') && p[1] == L(']')) + { + p += 2; + break; + } + if (c < L('a') || c >= L('z')) + { + /* This cannot possibly be a character class name. + Match it as a normal range. */ + p = startp; + c = L('['); + goto normal_bracket; + } + str[c1++] = c; + } + str[c1] = L('\0'); + +# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + wt = IS_CHAR_CLASS (str); + if (wt == 0) + /* Invalid character class name. */ + return FNM_NOMATCH; + + if (__iswctype (__btowc ((UCHAR) *n), wt)) + goto matched; +# else + if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n)) + || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n)) + || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n)) + || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n)) + || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n)) + || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n)) + || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n)) + || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n)) + || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n)) + || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n)) + || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n)) + || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n))) + goto matched; +# endif + } + else if (c == L('\0')) + /* [ (unterminated) loses. */ + return FNM_NOMATCH; + else + { + normal_bracket: + if (FOLD (c) == fn) + goto matched; + + cold = c; + c = *p++; + + if (c == L('-') && *p != L(']')) + { + /* It is a range. */ + UCHAR cend = *p++; + if (!(flags & FNM_NOESCAPE) && cend == L('\\')) + cend = *p++; + if (cend == L('\0')) + return FNM_NOMATCH; + + if (cold <= fn && fn <= FOLD (cend)) + goto matched; + + c = *p++; + } + } + + if (c == L(']')) + break; + } + + if (!not) + return FNM_NOMATCH; + break; + + matched: + /* Skip the rest of the [...] that already matched. */ + while (c != L(']')) + { + if (c == L('\0')) + /* [... (unterminated) loses. */ + return FNM_NOMATCH; + + c = *p++; + if (!(flags & FNM_NOESCAPE) && c == L('\\')) + { + if (*p == L('\0')) + return FNM_NOMATCH; + /* XXX 1003.2d11 is unclear if this is right. */ + ++p; + } + else if (c == L('[') && *p == L(':')) + { + do + if (*++p == L('\0')) + return FNM_NOMATCH; + while (*p != L(':') || p[1] == L(']')); + p += 2; + c = *p; + } + } + if (not) + return FNM_NOMATCH; + } + break; + + default: + if (c != FOLD ((UCHAR) *n)) + return FNM_NOMATCH; + } + + ++n; + } + + if (*n == '\0') + return 0; + + if ((flags & FNM_LEADING_DIR) && *n == L('/')) + /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + return 0; + + return FNM_NOMATCH; + +#undef FOLD +#undef CHAR +#undef UCHAR +#undef FCT +#undef STRCHR +#undef STRCHRNUL +#undef L +} |