diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | Doc/Zsh/expn.yo | 87 | ||||
-rw-r--r-- | Src/pattern.c | 32 | ||||
-rw-r--r-- | Test/D02glob.ztst | 25 |
4 files changed, 135 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog index a254f0a32..4b61f4ee5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2005-05-09 Peter Stephenson <pws@csr.com> + + * 21211: Doc/Zsh/expn.yo, Src/pattern.c, Test/D02glob.ztst: + Add [[:IDENT:]], [[:IFS:]], [[:IFSSPACE:]], [[:WORD:]] tests. + 2005-05-08 Bart Schaefer <schaefer@zsh.org> * 21235, 21236: Completion/Unix/Command/_ssh: fix remote filename diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index a16e252e6..a6235222f 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1224,19 +1224,82 @@ A `tt(-)' or `tt(])' may be matched by including it as the first character in the list. cindex(character classes) There are also several named classes of characters, in the form -`tt([:)var(name)tt(:])' with the following meanings: `tt([:alnum:])' -alphanumeric, `tt([:alpha:])' alphabetic, -`tt([:ascii:])' 7-bit, -`tt([:blank:])' space or tab, -`tt([:cntrl:])' control character, `tt([:digit:])' decimal -digit, `tt([:graph:])' printable character except whitespace, -`tt([:lower:])' lowercase letter, `tt([:print:])' printable character, -`tt([:punct:])' printable character neither alphanumeric nor whitespace, -`tt([:space:])' whitespace character, `tt([:upper:])' uppercase letter, -`tt([:xdigit:])' hexadecimal digit. These use the macros provided by +`tt([:)var(name)tt(:])' with the following meanings. +The first set use the macros provided by the operating system to test for the given character combinations, -including any modifications due to local language settings: see -manref(ctype)(3). Note that the square brackets are additional +including any modifications due to local language settings, see +manref(ctype)(3): + +startitem() +item(tt([:alnum:]))( +The character is alphanumeric +) +item(tt([:alpha:])) +( +The character is alphabetic +) +item(tt([:ascii:]))( +The character is 7-bit, i.e. is a single-byte character without +the top bit set. +) +item(tt([:blank:]))( +The character is either space or tab +) +item(tt([:cntrl:]))( +The character is a control character +) +item(tt([:digit:]))( +The character is a decimal digit +) +item(tt([:graph:]))( +The character is a printable character other than whitespace +) +item(tt([:lower:]))(l +The character is a lowercase letter +) +item(tt([:print:]))( +The character is printable +) +item(tt([:punct:]))( +The character is printable but neither alphanumeric nor whitespace +) +item(tt([:space:]))( +The character is whitespace +) +item(tt([:upper:]))( +The character is an uppercase letter +) +item(tt([:xdigit:]))( +The character is a hexadecimal digit +) +enditem() + +Another set of named classes is handled internally by the shell and +is not sensitive to the locale: + +startitem() +item(tt([:IDENT:]))( +The character is allowed to form part of a shell identifier, such +as a parameter name +) +item(tt([:IFS:]))( +The character is used as an input field separator, i.e. is contained in the +tt(IFS) parameter +) +item(tt([:IFSSPACE:]))( +The character is an IFS white space character; see the documentation +for tt(IFS) in +ifzman(the zmanref(zshparams) manual page)\ +ifnzman(noderef(Parameters Used By The Shell))\ +. +) +item(tt([:WORD:]))( +The character is treated as part of a word; this test is sensitive +to the value of the tt(WORDCHARS) parameter +) +enditem() + +Note that the square brackets are additional to those enclosing the whole set of characters, so to test for a single alphanumeric character you need `tt([[:alnum:]])'. Named character sets can be used alongside other types, diff --git a/Src/pattern.c b/Src/pattern.c index ed88bb7ce..393d9bf41 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -193,8 +193,12 @@ typedef union upat *Upat; #define PP_SPACE 11 #define PP_UPPER 12 #define PP_XDIGIT 13 -#define PP_UNKWN 14 -#define PP_RANGE 15 +#define PP_IDENT 14 +#define PP_IFS 15 +#define PP_IFSSPACE 16 +#define PP_WORD 17 +#define PP_UNKWN 18 +#define PP_RANGE 19 #define P_OP(p) ((p)->l & 0xff) #define P_NEXT(p) ((p)->l >> 8) @@ -1118,6 +1122,14 @@ patcomppiece(int *flagp) ch = PP_UPPER; else if (!strncmp(patparse, "xdigit", len)) ch = PP_XDIGIT; + else if (!strncmp(patparse, "IDENT", len)) + ch = PP_IDENT; + else if (!strncmp(patparse, "IFS", len)) + ch = PP_IFS; + else if (!strncmp(patparse, "IFSSPACE", len)) + ch = PP_IFSSPACE; + else if (!strncmp(patparse, "WORD", len)) + ch = PP_WORD; else ch = PP_UNKWN; patparse = nptr + 2; @@ -2724,6 +2736,22 @@ patmatchrange(char *range, int ch) if (isxdigit(ch)) return 1; break; + case PP_IDENT: + if (iident(ch)) + return 1; + break; + case PP_IFS: + if (isep(ch)) + return 1; + break; + case PP_IFSSPACE: + if (iwsep(ch)) + return 1; + break; + case PP_WORD: + if (iword(ch)) + return 1; + break; case PP_RANGE: range++; r1 = STOUC(UNMETA(range)); diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst index e423176a3..d21aaf8c4 100644 --- a/Test/D02glob.ztst +++ b/Test/D02glob.ztst @@ -323,3 +323,28 @@ print glob.tmp/ra=1.0_et=3.5/??? 0:Bug with intermediate paths with plain strings but tokenized characters >glob.tmp/ra=1.0_et=3.5/foo + + doesmatch() { + setopt localoptions extendedglob + print -n $1 $2\ + if [[ $1 = $~2 ]]; then print yes; else print no; fi; + } + doesmatch MY_IDENTIFIER '[[:IDENT:]]##' + doesmatch YOUR:IDENTIFIER '[[:IDENT:]]##' + IFS=$'\n' doesmatch $'\n' '[[:IFS:]]' + IFS=' ' doesmatch $'\n' '[[:IFS:]]' + IFS=':' doesmatch : '[[:IFSSPACE:]]' + IFS=' ' doesmatch ' ' '[[:IFSSPACE:]]' + WORDCHARS="" doesmatch / '[[:WORD:]]' + WORDCHARS="/" doesmatch / '[[:WORD:]]' +0:Named character sets handled internally +>MY_IDENTIFIER [[:IDENT:]]## yes +>YOUR:IDENTIFIER [[:IDENT:]]## no +> +> [[:IFS:]] yes +> +> [[:IFS:]] no +>: [[:IFSSPACE:]] no +> [[:IFSSPACE:]] yes +>/ [[:WORD:]] no +>/ [[:WORD:]] yes |