37678: Now possible to quote "-" in pattern range

author: Peter Stephenson <pws@zsh.org> 2016-01-19 09:55:46 +0000
committer: Peter Stephenson <pws@zsh.org> 2016-01-19 09:55:46 +0000
commit: 8eb9070d6785f423dd9bdbbb0513aa47c8a08d62 (patch)
tree: 412d2d74ceabd14860d313e9338de48e7b5d8291
parent: d0cd9032d8b46051b490790a30e87a1e87c86670 (diff)
download: zsh-8eb9070d6785f423dd9bdbbb0513aa47c8a08d62.tar.gz
zsh-8eb9070d6785f423dd9bdbbb0513aa47c8a08d62.tar.xz
zsh-8eb9070d6785f423dd9bdbbb0513aa47c8a08d62.zip
7 files changed, 102 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index d64e2c121..71acc1e64 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2016-01-18  Daniel Shahaf  <d.s@daniel.shahaf.name>
 
+	* 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c,
+	Src/zsh.h, Test/D02glob.ztst: Now possible to quote "-" in
+	a pattern range.
+
 	* 37634: Completion/Unix/Command/_man: _man: Support subsection
 	names such as '3p'.
 
diff --git a/Src/glob.c b/Src/glob.c
index 8bd2fc493..e5d8956e6 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -3476,7 +3476,7 @@ static void
 zshtokenize(char *s, int flags)
 {
     char *t;
-    int bslash = 0;
+    int bslash = 0, seen_brct = 0;
 
     for (; *s; s++) {
       cont:
@@ -3507,21 +3507,35 @@ zshtokenize(char *s, int flags)
 	    *t = Inang;
 	    *s = Outang;
 	    break;
+	case '[':
+	    if (bslash)
+		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
+	    else {
+		seen_brct = 1;
+		*s = Inbrack;
+	    }
+	    break;
+	case '-':
+	    if (bslash)
+		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
+	    else if (seen_brct) /* see corresonding code in lex.c */
+		*s = Dash;
+	    break;
 	case '(':
 	case '|':
 	case ')':
 	    if (flags & ZSHTOK_SHGLOB)
 		break;
+	    /*FALLTHROUGH*/
 	case '>':
 	case '^':
 	case '#':
 	case '~':
-	case '[':
 	case ']':
 	case '*':
 	case '?':
 	case '=':
-	    for (t = ztokens; *t; t++)
+	    for (t = ztokens; *t; t++) {
 		if (*t == *s) {
 		    if (bslash)
 			s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
@@ -3529,6 +3543,8 @@ zshtokenize(char *s, int flags)
 			*s = (t - ztokens) + Pound;
 		    break;
 		}
+	    }
+	    break;
 	}
 	bslash = 0;
     }
diff --git a/Src/lex.c b/Src/lex.c
index 0f260d08f..9a7e3b8fe 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -35,7 +35,7 @@
 /* tokens */
 
 /**/
-mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\";
+mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\";
 
 /* parts of the current token */
 
@@ -394,8 +394,9 @@ ctxtlex(void)
 #define LX2_DQUOTE 15
 #define LX2_BQUOTE 16
 #define LX2_COMMA 17
-#define LX2_OTHER 18
-#define LX2_META 19
+#define LX2_DASH 18
+#define LX2_OTHER 19
+#define LX2_META 20
 
 static unsigned char lexact1[256], lexact2[256], lextok2[256];
 
@@ -405,7 +406,7 @@ initlextabs(void)
 {
     int t0;
     static char *lx1 = "\\q\n;!&|(){}[]<>";
-    static char *lx2 = ";)|$[]~({}><=\\\'\"`,";
+    static char *lx2 = ";)|$[]~({}><=\\\'\"`,-";
 
     for (t0 = 0; t0 != 256; t0++) {
 	lexact1[t0] = LX1_OTHER;
@@ -919,7 +920,7 @@ gettok(void)
 static enum lextok
 gettokstr(int c, int sub)
 {
-    int bct = 0, pct = 0, brct = 0, fdpar = 0;
+    int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
     int intpos = 1, in_brace_param = 0;
     int inquote, unmatched = 0;
     enum lextok peek;
@@ -1033,8 +1034,10 @@ gettokstr(int c, int sub)
 	    }
 	    break;
 	case LX2_INBRACK:
-	    if (!in_brace_param)
+	    if (!in_brace_param) {
 		brct++;
+		seen_brct = 1;
+	    }
 	    c = Inbrack;
 	    break;
 	case LX2_OUTBRACK:
@@ -1346,6 +1349,21 @@ gettokstr(int c, int sub)
 	    c = Tick;
 	    SETPAREND
 	    break;
+	case LX2_DASH:
+	    /*
+	     * - shouldn't be treated as a special character unless
+	     * we're in a pattern.  Howeve,simply  counting "[" doesn't
+	     * work as []a-z] is a valid expression and we don't know
+	     * down here what this "[" is for as $foo[stuff] is valid
+	     * in zsh.  So just detect an opening [, which is enough
+	     * to turn this into a pattern; the Dash will be harmlessly
+	     * untokenised if not wanted.
+	     */
+	    if (seen_brct)
+		c = Dash;
+	    else
+		c = '-';
+	    break;
 	}
 	add(c);
 	c = hgetc();
diff --git a/Src/pattern.c b/Src/pattern.c
index 9e8a80ae1..d2b8c590b 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1459,7 +1459,7 @@ patcomppiece(int *flagp, int paren)
 		charstart = patparse;
 		METACHARINC(patparse);
 
-		if (*patparse == '-' && patparse[1] &&
+		if (*patparse == Dash && patparse[1] &&
 		    patparse[1] != Outbrack) {
 		    patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
 		    if (itok(*charstart)) {
@@ -1468,7 +1468,7 @@ patcomppiece(int *flagp, int paren)
 		    } else {
 			patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
 		    }
-		    charstart = ++patparse;	/* skip ASCII '-' */
+		    charstart = ++patparse;	/* skip Dash token */
 		    METACHARINC(patparse);
 		}
 		if (itok(*charstart)) {
diff --git a/Src/utils.c b/Src/utils.c
index 788eba97e..fd0bab320 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -3888,7 +3888,7 @@ inittyptab(void)
     typtab['\0'] |= IMETA;
     typtab[STOUC(Meta)  ] |= IMETA;
     typtab[STOUC(Marker)] |= IMETA;
-    for (t0 = (int)STOUC(Pound); t0 <= (int)STOUC(Comma); t0++)
+    for (t0 = (int)STOUC(Pound); t0 <= (int)STOUC(LAST_NORMAL_TOK); t0++)
 	typtab[t0] |= ITOK | IMETA;
     for (t0 = (int)STOUC(Snull); t0 <= (int)STOUC(Nularg); t0++)
 	typtab[t0] |= ITOK | IMETA | INULL;
diff --git a/Src/zsh.h b/Src/zsh.h
index 0302d6886..6ee2a9c8d 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -192,24 +192,30 @@ struct mathfunc {
 #define Tilde		((char) 0x98)
 #define Qtick		((char) 0x99)
 #define Comma		((char) 0x9a)
+#define Dash            ((char) 0x9b) /* Only in patterns */
+/*
+ * Marks the last of the group above.
+ * Remaining tokens are even more special.
+ */
+#define LAST_NORMAL_TOK Dash
 /*
  * Null arguments: placeholders for single and double quotes
  * and backslashes.
  */
-#define Snull		((char) 0x9b)
-#define Dnull		((char) 0x9c)
-#define Bnull		((char) 0x9d)
+#define Snull		((char) 0x9c)
+#define Dnull		((char) 0x9d)
+#define Bnull		((char) 0x9e)
 /*
  * Backslash which will be returned to "\" instead of being stripped
  * when we turn the string into a printable format.
  */
-#define Bnullkeep       ((char) 0x9e)
+#define Bnullkeep       ((char) 0x9f)
 /*
  * Null argument that does not correspond to any character.
  * This should be last as it does not appear in ztokens and
  * is used to initialise the IMETA type in inittyptab().
  */
-#define Nularg		((char) 0x9f)
+#define Nularg		((char) 0xa0)
 
 /*
  * Take care to update the use of IMETA appropriately when adding
@@ -220,7 +226,7 @@ struct mathfunc {
  * Also used in pattern character arrays as guaranteed not to
  * mark a character in a string.
  */
-#define Marker		((char) 0xa0)
+#define Marker		((char) 0xa1)
 
 /* chars that need to be quoted if meant literally */
 
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index f944a4fbd..89256e303 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -582,3 +582,43 @@
 >1 OK
 >2 OK
 >3 OK
+
+  [[ foo = 'f'\o"o" ]]
+0:Stripping of quotes from patterns (1)
+
+  [[ foo = 'f'('o'|'a')('o'|'b') ]]
+0:Stripping of quotes from patterns (2)
+
+  [[ fob = 'f'('o'|'a')('o'|'b') ]]
+0:Stripping of quotes from patterns (3)
+
+  [[ fab = 'f'('o'|'a')('o'|'b') ]]
+0:Stripping of quotes from patterns (4)
+
+  [[ fib != 'f'('o'|'a')('o'|'b') ]]
+0:Stripping of quotes from patterns (4)
+
+  [[ - != [a-z] ]]
+0:- is a special character in ranges
+
+  [[ - = ['a-z'] ]]
+0:- is not a special character in ranges if quoted
+
+  [[ b-1 = [a-z]-[0-9] ]]
+0:- untokenized following a bracketed subexpression
+
+  [[ b-1 = []a-z]-[]0-9] ]]
+0:- "]" after "[" is normal range character and - still works
+
+  headremove="bcdef"
+  print ${headremove#[a-z]}
+0:active - works in pattern in parameter
+>cdef
+
+  headremove="bcdef"
+  print ${headremove#['a-z']}
+  headremove="-cdef"
+  print ${headremove#['a-z']}
+0:quoted - works in pattern in parameter
+>bcdef
+>cdef
author	Peter Stephenson <pws@zsh.org>	2016-01-19 09:55:46 +0000
committer	Peter Stephenson <pws@zsh.org>	2016-01-19 09:55:46 +0000
commit	8eb9070d6785f423dd9bdbbb0513aa47c8a08d62 (patch)
tree	412d2d74ceabd14860d313e9338de48e7b5d8291
parent	d0cd9032d8b46051b490790a30e87a1e87c86670 (diff)
download	zsh-8eb9070d6785f423dd9bdbbb0513aa47c8a08d62.tar.gz zsh-8eb9070d6785f423dd9bdbbb0513aa47c8a08d62.tar.xz zsh-8eb9070d6785f423dd9bdbbb0513aa47c8a08d62.zip