From ad16356e1923ec1b4daf97b27b10a835cfe73ba7 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 19 Jan 2016 17:24:12 +0000 Subject: 37689: ! and ^ need to be tokenised in character sets --- ChangeLog | 6 ++++++ README | 34 ++++++++++++++++++++++++++++++---- Src/glob.c | 19 ++++--------------- Src/lex.c | 31 ++++++++++++++++++++----------- Src/pattern.c | 13 ++++++++----- Src/zsh.h | 16 +++++++++------- Test/D02glob.ztst | 33 +++++++++++++++++++++++++++++++++ 7 files changed, 110 insertions(+), 42 deletions(-) diff --git a/ChangeLog b/ChangeLog index 71acc1e64..4264932f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2016-01-19 Peter Stephenson + + * 37689: README, Src/glob.c, Src/lex.c, Src/pattern.c, + Src/zsh.h, Test/D02glob.ztst: also ! and ^ need to be tokenised + in character set. + 2016-01-18 Daniel Shahaf * 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c, diff --git a/README b/README index 2e2ebce2b..6e5b73067 100644 --- a/README +++ b/README @@ -29,17 +29,43 @@ Zsh is a shell with lots of features. For a list of some of these, see the file FEATURES, and for the latest changes see NEWS. For more details, see the documentation. -Incompatibilities between 5.1 and 5.2 +Incompatibilities between 5.2 and 5.3 ------------------------------------- +In character classes delimited by "[" and "]" within patterns, whether +used for filename generation (globbing) or other forms of pattern +matching, it used not to be possible to quote "-" when used for a range, +or "^" and "!" when used for negating a character set. The characters can +now be quoted by any of the standard shell means, but note that +the "[" and "]" must not be quoted. For example, + + [[ $a = ['a-z'] ]] + +matches if the variable a contains just one of the characters "a", "-" +or "z" only. Previously this would have matched any lower case ASCII +letter. Note therefore the useful fact that + + [[ $a = ["$cset"] ]] + +matches any character contained in the variable "cset". A consequence +of this change is that variables that should have active ranges need +(with default zsh options) to be indicated explicitly, e.g. + + cset="a-z" + [[ b = [${~cset}] ]] + +The "~" causes the "-" character to be active. In sh emulation the +"~" is unncessary in this example and double quotes must be used to +suppress the range behaviour of the "-". + +Incompatibilities between 5.0.8 and 5.2 +--------------------------------------- + The behaviour of the parameter flag (P) has changed when it appears in a nested parameter group, in order to make it more useful in such cases. A (P) in the outermost parameter group behaves as before. See NEWS for more. -Incompatibilities between 5.0.8 and 5.1 ---------------------------------------- - The default behaviour when text is pasted into an X Windows terminal has changed significantly (unless you are using a very old terminal emulator that doesn't support this mode). Now, the new "bracketed paste mode" diff --git a/Src/glob.c b/Src/glob.c index e5d8956e6..c7992813e 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -3476,7 +3476,7 @@ static void zshtokenize(char *s, int flags) { char *t; - int bslash = 0, seen_brct = 0; + int bslash = 0; for (; *s; s++) { cont: @@ -3507,20 +3507,6 @@ zshtokenize(char *s, int flags) *t = Inang; *s = Outang; break; - case '[': - if (bslash) - s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; - else { - seen_brct = 1; - *s = Inbrack; - } - break; - case '-': - if (bslash) - s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; - else if (seen_brct) /* see corresonding code in lex.c */ - *s = Dash; - break; case '(': case '|': case ')': @@ -3531,10 +3517,13 @@ zshtokenize(char *s, int flags) case '^': case '#': case '~': + case '[': case ']': case '*': case '?': case '=': + case '-': + case '!': for (t = ztokens; *t; t++) { if (*t == *s) { if (bslash) diff --git a/Src/lex.c b/Src/lex.c index 9a7e3b8fe..0202d2559 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -35,7 +35,7 @@ /* tokens */ /**/ -mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\"; +mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\"; /* parts of the current token */ @@ -395,8 +395,9 @@ ctxtlex(void) #define LX2_BQUOTE 16 #define LX2_COMMA 17 #define LX2_DASH 18 -#define LX2_OTHER 19 -#define LX2_META 20 +#define LX2_BANG 19 +#define LX2_OTHER 20 +#define LX2_META 21 static unsigned char lexact1[256], lexact2[256], lextok2[256]; @@ -406,10 +407,10 @@ initlextabs(void) { int t0; static char *lx1 = "\\q\n;!&|(){}[]<>"; - static char *lx2 = ";)|$[]~({}><=\\\'\"`,-"; + static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!"; for (t0 = 0; t0 != 256; t0++) { - lexact1[t0] = LX1_OTHER; + lexact1[t0] = LX1_OTHER; lexact2[t0] = LX2_OTHER; lextok2[t0] = t0; } @@ -1361,12 +1362,20 @@ gettokstr(int c, int sub) */ if (seen_brct) c = Dash; - else - c = '-'; - break; - } - add(c); - c = hgetc(); + else + c = '-'; + break; + case LX2_BANG: + /* + * Same logic as Dash, for ! to perform negation in range. + */ + if (seen_brct) + c = Bang; + else + c = '!'; + } + add(c); + c = hgetc(); if (intpos) intpos--; if (lexstop) diff --git a/Src/pattern.c b/Src/pattern.c index d2b8c590b..72c7d97d5 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -247,7 +247,7 @@ typedef unsigned long zrange_t; */ static const char zpc_chars[ZPC_COUNT] = { '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang, - Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@' + Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@' }; /* @@ -257,7 +257,7 @@ static const char zpc_chars[ZPC_COUNT] = { /**/ mod_export const char *zpc_strings[ZPC_COUNT] = { NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<", - "^", "#", NULL, "?(", "*(", "+(", "!(", "@(" + "^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@(" }; /* @@ -481,7 +481,7 @@ patcompcharsset(void) */ zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] = zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] = - zpc_special[ZPC_KSH_AT] = Marker; + zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker; } /* * Note that if we are using KSHGLOB, then we test for a following @@ -1268,6 +1268,8 @@ patcomppiece(int *flagp, int paren) kshchar = STOUC('+'); else if (*patparse == zpc_special[ZPC_KSH_BANG]) kshchar = STOUC('!'); + else if (*patparse == zpc_special[ZPC_KSH_BANG2]) + kshchar = STOUC('!'); else if (*patparse == zpc_special[ZPC_KSH_AT]) kshchar = STOUC('@'); else if (*patparse == zpc_special[ZPC_KSH_STAR]) @@ -1424,7 +1426,7 @@ patcomppiece(int *flagp, int paren) DPUTS(zpc_special[ZPC_INBRACK] == Marker, "Treating '[' as pattern character although disabled"); flags |= P_SIMPLE; - if (*patparse == Hat || *patparse == '^' || *patparse == '!') { + if (*patparse == Hat || *patparse == Bang) { patparse++; starter = patnode(P_ANYBUT); } else @@ -4245,7 +4247,8 @@ haswilds(char *str) ((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) || (str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) || (str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) || - (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) || + (str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) || + (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) || (str[-1] == '@' && !zpc_disables[ZPC_KSH_AT])))) return 1; break; diff --git a/Src/zsh.h b/Src/zsh.h index 6ee2a9c8d..b83b8bdbb 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -193,29 +193,30 @@ struct mathfunc { #define Qtick ((char) 0x99) #define Comma ((char) 0x9a) #define Dash ((char) 0x9b) /* Only in patterns */ +#define Bang ((char) 0x9c) /* Only in patterns */ /* * Marks the last of the group above. * Remaining tokens are even more special. */ -#define LAST_NORMAL_TOK Dash +#define LAST_NORMAL_TOK Bang /* * Null arguments: placeholders for single and double quotes * and backslashes. */ -#define Snull ((char) 0x9c) -#define Dnull ((char) 0x9d) -#define Bnull ((char) 0x9e) +#define Snull ((char) 0x9d) +#define Dnull ((char) 0x9e) +#define Bnull ((char) 0x9f) /* * Backslash which will be returned to "\" instead of being stripped * when we turn the string into a printable format. */ -#define Bnullkeep ((char) 0x9f) +#define Bnullkeep ((char) 0xa0) /* * Null argument that does not correspond to any character. * This should be last as it does not appear in ztokens and * is used to initialise the IMETA type in inittyptab(). */ -#define Nularg ((char) 0xa0) +#define Nularg ((char) 0xa1) /* * Take care to update the use of IMETA appropriately when adding @@ -226,7 +227,7 @@ struct mathfunc { * Also used in pattern character arrays as guaranteed not to * mark a character in a string. */ -#define Marker ((char) 0xa1) +#define Marker ((char) 0xa2) /* chars that need to be quoted if meant literally */ @@ -1549,6 +1550,7 @@ enum zpc_chars { ZPC_KSH_STAR, /* * for *(...) in KSH_GLOB */ ZPC_KSH_PLUS, /* + for +(...) in KSH_GLOB */ ZPC_KSH_BANG, /* ! for !(...) in KSH_GLOB */ + ZPC_KSH_BANG2, /* ! for !(...) in KSH_GLOB, untokenised */ ZPC_KSH_AT, /* @ for @(...) in KSH_GLOB */ ZPC_COUNT /* Number of special chararacters */ }; diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst index 89256e303..a6b704a8e 100644 --- a/Test/D02glob.ztst +++ b/Test/D02glob.ztst @@ -622,3 +622,36 @@ 0:quoted - works in pattern in parameter >bcdef >cdef + + [[ a != [^a] ]] +0:^ active in character class if not quoted + + [[ a = ['^a'] ]] +0:^ not active in character class if quoted + + [[ a != [!a] ]] +0:! active in character class if not quoted + + [[ a = ['!a'] ]] +0:! not active in character class if quoted + + # Actually, we don't need the quoting here, + # c.f. the next test. This just makes it look + # more standard. + cset="^a-z" + [[ "^" = ["$cset"] ]] || print Fail 1 + [[ "a" = ["$cset"] ]] || print Fail 2 + [[ "-" = ["$cset"] ]] || print Fail 3 + [[ "z" = ["$cset"] ]] || print Fail 4 + [[ "1" != ["$cset"] ]] || print Fail 5 + [[ "b" != ["$cset"] ]] || print Fail 6 +0:character set specified as quoted variable + + cset="^a-z" + [[ "^" = [$~cset] ]] || print Fail 1 + [[ "a" != [$~cset] ]] || print Fail 2 + [[ "-" = [$~cset] ]] || print Fail 3 + [[ "z" != [$~cset] ]] || print Fail 4 + [[ "1" = [$~cset] ]] || print Fail 5 + [[ "b" != [$~cset] ]] || print Fail 6 +0:character set specified as active variabe -- cgit 1.4.1