summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@zsh.org>2016-01-19 17:24:12 +0000
committerPeter Stephenson <pws@zsh.org>2016-01-19 17:24:12 +0000
commitad16356e1923ec1b4daf97b27b10a835cfe73ba7 (patch)
tree714fe0c1d6c89a32ac5194475402fa6dc3f8d218
parent8eb9070d6785f423dd9bdbbb0513aa47c8a08d62 (diff)
downloadzsh-ad16356e1923ec1b4daf97b27b10a835cfe73ba7.tar.gz
zsh-ad16356e1923ec1b4daf97b27b10a835cfe73ba7.tar.xz
zsh-ad16356e1923ec1b4daf97b27b10a835cfe73ba7.zip
37689: ! and ^ need to be tokenised in character sets
-rw-r--r--ChangeLog6
-rw-r--r--README34
-rw-r--r--Src/glob.c19
-rw-r--r--Src/lex.c31
-rw-r--r--Src/pattern.c13
-rw-r--r--Src/zsh.h16
-rw-r--r--Test/D02glob.ztst33
7 files changed, 110 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index 71acc1e64..4264932f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2016-01-19  Peter Stephenson  <p.stephenson@samsung.com>
+
+	* 37689: README, Src/glob.c, Src/lex.c, Src/pattern.c,
+	Src/zsh.h, Test/D02glob.ztst: also ! and ^ need to be tokenised
+	in character set.
+
 2016-01-18  Daniel Shahaf  <d.s@daniel.shahaf.name>
 
 	* 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c,
diff --git a/README b/README
index 2e2ebce2b..6e5b73067 100644
--- a/README
+++ b/README
@@ -29,17 +29,43 @@ Zsh is a shell with lots of features.  For a list of some of these, see the
 file FEATURES, and for the latest changes see NEWS.  For more
 details, see the documentation.
 
-Incompatibilities between 5.1 and 5.2
+Incompatibilities between 5.2 and 5.3
 -------------------------------------
 
+In character classes delimited by "[" and "]" within patterns, whether
+used for filename generation (globbing) or other forms of pattern
+matching, it used not to be possible to quote "-" when used for a range,
+or "^" and "!" when used for negating a character set.  The characters can
+now be quoted by any of the standard shell means, but note that
+the "[" and "]" must not be quoted.  For example,
+
+  [[ $a = ['a-z'] ]]
+
+matches if the variable a contains just one of the characters "a", "-"
+or "z" only.  Previously this would have matched any lower case ASCII
+letter.  Note therefore the useful fact that
+
+  [[ $a = ["$cset"] ]]
+
+matches any character contained in the variable "cset".  A consequence
+of this change is that variables that should have active ranges need
+(with default zsh options) to be indicated explicitly, e.g.
+
+  cset="a-z"
+  [[ b = [${~cset}] ]]
+
+The "~" causes the "-" character to be active.  In sh emulation the
+"~" is unncessary in this example and double quotes must be used to
+suppress the range behaviour of the "-".
+
+Incompatibilities between 5.0.8 and 5.2
+---------------------------------------
+
 The behaviour of the parameter flag (P) has changed when it appears
 in a nested parameter group, in order to make it more useful in
 such cases.  A (P) in the outermost parameter group behaves as
 before.  See NEWS for more.
 
-Incompatibilities between 5.0.8 and 5.1
----------------------------------------
-
 The default behaviour when text is pasted into an X Windows terminal has
 changed significantly (unless you are using a very old terminal emulator
 that doesn't support this mode).  Now, the new "bracketed paste mode"
diff --git a/Src/glob.c b/Src/glob.c
index e5d8956e6..c7992813e 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -3476,7 +3476,7 @@ static void
 zshtokenize(char *s, int flags)
 {
     char *t;
-    int bslash = 0, seen_brct = 0;
+    int bslash = 0;
 
     for (; *s; s++) {
       cont:
@@ -3507,20 +3507,6 @@ zshtokenize(char *s, int flags)
 	    *t = Inang;
 	    *s = Outang;
 	    break;
-	case '[':
-	    if (bslash)
-		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
-	    else {
-		seen_brct = 1;
-		*s = Inbrack;
-	    }
-	    break;
-	case '-':
-	    if (bslash)
-		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
-	    else if (seen_brct) /* see corresonding code in lex.c */
-		*s = Dash;
-	    break;
 	case '(':
 	case '|':
 	case ')':
@@ -3531,10 +3517,13 @@ zshtokenize(char *s, int flags)
 	case '^':
 	case '#':
 	case '~':
+	case '[':
 	case ']':
 	case '*':
 	case '?':
 	case '=':
+	case '-':
+	case '!':
 	    for (t = ztokens; *t; t++) {
 		if (*t == *s) {
 		    if (bslash)
diff --git a/Src/lex.c b/Src/lex.c
index 9a7e3b8fe..0202d2559 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -35,7 +35,7 @@
 /* tokens */
 
 /**/
-mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\";
+mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
 
 /* parts of the current token */
 
@@ -395,8 +395,9 @@ ctxtlex(void)
 #define LX2_BQUOTE 16
 #define LX2_COMMA 17
 #define LX2_DASH 18
-#define LX2_OTHER 19
-#define LX2_META 20
+#define LX2_BANG 19
+#define LX2_OTHER 20
+#define LX2_META 21
 
 static unsigned char lexact1[256], lexact2[256], lextok2[256];
 
@@ -406,10 +407,10 @@ initlextabs(void)
 {
     int t0;
     static char *lx1 = "\\q\n;!&|(){}[]<>";
-    static char *lx2 = ";)|$[]~({}><=\\\'\"`,-";
+    static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
 
     for (t0 = 0; t0 != 256; t0++) {
-	lexact1[t0] = LX1_OTHER;
+       lexact1[t0] = LX1_OTHER;
 	lexact2[t0] = LX2_OTHER;
 	lextok2[t0] = t0;
     }
@@ -1361,12 +1362,20 @@ gettokstr(int c, int sub)
 	     */
 	    if (seen_brct)
 		c = Dash;
-	    else
-		c = '-';
-	    break;
-	}
-	add(c);
-	c = hgetc();
+           else
+               c = '-';
+           break;
+       case LX2_BANG:
+           /*
+            * Same logic as Dash, for ! to perform negation in range.
+            */
+           if (seen_brct)
+               c = Bang;
+           else
+               c = '!';
+       }
+       add(c);
+       c = hgetc();
 	if (intpos)
 	    intpos--;
 	if (lexstop)
diff --git a/Src/pattern.c b/Src/pattern.c
index d2b8c590b..72c7d97d5 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -247,7 +247,7 @@ typedef unsigned long zrange_t;
  */
 static const char zpc_chars[ZPC_COUNT] = {
     '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
-    Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
+    Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@'
 };
 
 /*
@@ -257,7 +257,7 @@ static const char zpc_chars[ZPC_COUNT] = {
 /**/
 mod_export const char *zpc_strings[ZPC_COUNT] = {
    NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
-   "^", "#", NULL, "?(", "*(", "+(", "!(", "@("
+   "^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@("
 };
 
 /*
@@ -481,7 +481,7 @@ patcompcharsset(void)
 	 */
 	zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
 	    zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
-	    zpc_special[ZPC_KSH_AT] = Marker;
+	    zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker;
     }
     /*
      * Note that if we are using KSHGLOB, then we test for a following
@@ -1268,6 +1268,8 @@ patcomppiece(int *flagp, int paren)
 		kshchar = STOUC('+');
 	    else if (*patparse == zpc_special[ZPC_KSH_BANG])
 		kshchar = STOUC('!');
+	    else if (*patparse == zpc_special[ZPC_KSH_BANG2])
+		kshchar = STOUC('!');
 	    else if (*patparse == zpc_special[ZPC_KSH_AT])
 		kshchar = STOUC('@');
 	    else if (*patparse == zpc_special[ZPC_KSH_STAR])
@@ -1424,7 +1426,7 @@ patcomppiece(int *flagp, int paren)
 	    DPUTS(zpc_special[ZPC_INBRACK] == Marker,
 		  "Treating '[' as pattern character although disabled");
 	    flags |= P_SIMPLE;
-	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
+	    if (*patparse == Hat || *patparse == Bang) {
 		patparse++;
 		starter = patnode(P_ANYBUT);
 	    } else
@@ -4245,7 +4247,8 @@ haswilds(char *str)
 		     ((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
 		      (str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
 		      (str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
-		      (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) ||
+		      (str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) ||
+		      (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) ||
 		      (str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
 		    return 1;
 		break;
diff --git a/Src/zsh.h b/Src/zsh.h
index 6ee2a9c8d..b83b8bdbb 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -193,29 +193,30 @@ struct mathfunc {
 #define Qtick		((char) 0x99)
 #define Comma		((char) 0x9a)
 #define Dash            ((char) 0x9b) /* Only in patterns */
+#define Bang            ((char) 0x9c) /* Only in patterns */
 /*
  * Marks the last of the group above.
  * Remaining tokens are even more special.
  */
-#define LAST_NORMAL_TOK Dash
+#define LAST_NORMAL_TOK Bang
 /*
  * Null arguments: placeholders for single and double quotes
  * and backslashes.
  */
-#define Snull		((char) 0x9c)
-#define Dnull		((char) 0x9d)
-#define Bnull		((char) 0x9e)
+#define Snull		((char) 0x9d)
+#define Dnull		((char) 0x9e)
+#define Bnull		((char) 0x9f)
 /*
  * Backslash which will be returned to "\" instead of being stripped
  * when we turn the string into a printable format.
  */
-#define Bnullkeep       ((char) 0x9f)
+#define Bnullkeep       ((char) 0xa0)
 /*
  * Null argument that does not correspond to any character.
  * This should be last as it does not appear in ztokens and
  * is used to initialise the IMETA type in inittyptab().
  */
-#define Nularg		((char) 0xa0)
+#define Nularg		((char) 0xa1)
 
 /*
  * Take care to update the use of IMETA appropriately when adding
@@ -226,7 +227,7 @@ struct mathfunc {
  * Also used in pattern character arrays as guaranteed not to
  * mark a character in a string.
  */
-#define Marker		((char) 0xa1)
+#define Marker		((char) 0xa2)
 
 /* chars that need to be quoted if meant literally */
 
@@ -1549,6 +1550,7 @@ enum zpc_chars {
     ZPC_KSH_STAR,               /* * for *(...) in KSH_GLOB */
     ZPC_KSH_PLUS,               /* + for +(...) in KSH_GLOB */
     ZPC_KSH_BANG,               /* ! for !(...) in KSH_GLOB */
+    ZPC_KSH_BANG2,              /* ! for !(...) in KSH_GLOB, untokenised */
     ZPC_KSH_AT,                 /* @ for @(...) in KSH_GLOB */
     ZPC_COUNT			/* Number of special chararacters */
 };
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 89256e303..a6b704a8e 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -622,3 +622,36 @@
 0:quoted - works in pattern in parameter
 >bcdef
 >cdef
+
+  [[ a != [^a] ]]
+0:^ active in character class if not quoted
+
+  [[ a = ['^a'] ]]
+0:^ not active in character class if quoted
+
+  [[ a != [!a] ]]
+0:! active in character class if not quoted
+
+  [[ a = ['!a'] ]]
+0:! not active in character class if quoted
+
+  # Actually, we don't need the quoting here,
+  # c.f. the next test.  This just makes it look
+  # more standard.
+  cset="^a-z"
+  [[ "^" = ["$cset"] ]] || print Fail 1
+  [[ "a" = ["$cset"] ]] || print Fail 2
+  [[ "-" = ["$cset"] ]] || print Fail 3
+  [[ "z" = ["$cset"] ]] || print Fail 4
+  [[ "1" != ["$cset"] ]] || print Fail 5
+  [[ "b" != ["$cset"] ]] || print Fail 6
+0:character set specified as quoted variable
+
+  cset="^a-z"
+  [[ "^" = [$~cset] ]] || print Fail 1
+  [[ "a" != [$~cset] ]] || print Fail 2
+  [[ "-" = [$~cset] ]] || print Fail 3
+  [[ "z" != [$~cset] ]] || print Fail 4
+  [[ "1" = [$~cset] ]] || print Fail 5
+  [[ "b" != [$~cset] ]] || print Fail 6
+0:character set specified as active variabe