about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <p.w.stephenson@ntlworld.com>2013-06-01 20:39:09 +0100
committerPeter Stephenson <p.w.stephenson@ntlworld.com>2013-06-01 20:39:09 +0100
commit68d0d76db55c0b8778f0b68d3eda54060b576c41 (patch)
tree6eacd19beede82e99524550f95e122a8abf85b1e
parentaa0875daa1df60fdb0151ddd585e69076d0f3114 (diff)
downloadzsh-68d0d76db55c0b8778f0b68d3eda54060b576c41.tar.gz
zsh-68d0d76db55c0b8778f0b68d3eda54060b576c41.tar.xz
zsh-68d0d76db55c0b8778f0b68d3eda54060b576c41.zip
31441: use array to decide which forms of pattern are enabled
-rw-r--r--ChangeLog5
-rw-r--r--Src/glob.c20
-rw-r--r--Src/pattern.c208
-rw-r--r--Src/zsh.h40
4 files changed, 182 insertions, 91 deletions
diff --git a/ChangeLog b/ChangeLog
index 3ea29b6b3..87a64b835 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-06-01  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 31441: Src/glob.c, Src/pattern.c, Src/zsh.h: use an array
+	based on ZPC_* enum to decide which forms of pattern are enabled.
+
 2013-05-30  Peter Stephenson  <p.stephenson@samsung.com>
 
 	* 31436: Doc/Zsh/expn.yo: KSH_GLOB syntax does not
diff --git a/Src/glob.c b/Src/glob.c
index ca2ffaf51..db86d2468 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -708,8 +708,9 @@ parsecomplist(char *instr)
     }
 
     /* Parse repeated directories such as (dir/)# and (dir/)## */
-    if (*(str = instr) == Inpar && !skipparens(Inpar, Outpar, (char **)&str) &&
-        *str == Pound && isset(EXTENDEDGLOB) && str[-2] == '/') {
+    if (*(str = instr) == zpc_special[ZPC_INPAR] &&
+	!skipparens(Inpar, Outpar, (char **)&str) &&
+        *str == zpc_special[ZPC_HASH] && str[-2] == '/') {
 	instr++;
 	if (!(p1 = patcompile(instr, compflags, &instr)))
 	    return NULL;
@@ -761,9 +762,9 @@ parsepat(char *str)
      * Check for initial globbing flags, so that they don't form
      * a bogus path component.
      */
-    if ((*str == Inpar && str[1] == Pound && isset(EXTENDEDGLOB)) ||
-	(isset(KSHGLOB) && *str == '@' && str[1] == Inpar &&
-	 str[2] == Pound)) {
+    if ((*str == zpc_special[ZPC_INPAR] && str[1] == zpc_special[ZPC_HASH]) ||
+	(*str == zpc_special[ZPC_KSH_AT] && str[1] == Inpar &&
+	 str[2] == zpc_special[ZPC_HASH])) {
 	str += (*str == Inpar) ? 2 : 3;
 	if (!patgetglobflags(&str, &assert, &ignore))
 	    return NULL;
@@ -1146,7 +1147,7 @@ zglob(LinkList list, LinkNode np, int nountok)
     gf_pre_words = NULL;
 
     /* Check for qualifiers */
-    while (!nobareglob || isset(EXTENDEDGLOB)) {
+    while (!nobareglob || zpc_special[ZPC_HASH] != Marker) {
 	struct qual *newquals;
 	char *s;
 	int sense, paren;
@@ -1192,10 +1193,11 @@ zglob(LinkList list, LinkNode np, int nountok)
 	    case Outpar:
 		paren++; /*FALLTHROUGH*/
 	    case Bar:
-		nobareglob = 1;
+		if (zpc_special[ZPC_BAR] != Marker)
+		    nobareglob = 1;
 		break;
 	    case Tilde:
-		if (isset(EXTENDEDGLOB))
+		if (zpc_special[ZPC_TILDE] != Marker)
 		    nobareglob = 1;
 		break;
 	    case Inpar:
@@ -1205,7 +1207,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 	}
 	if (*s != Inpar)
 	    break;
-	if (isset(EXTENDEDGLOB) && s[1] == Pound) {
+	if (s[1] == zpc_special[ZPC_HASH]) {
 	    if (s[2] == 'q') {
 		*s = 0;
 		s += 2;
diff --git a/Src/pattern.c b/Src/pattern.c
index 3b6edb850..54d6e7cb3 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -225,34 +225,27 @@ typedef unsigned long zrange_t;
 #endif
 
 /*
- * Characters which terminate a pattern segment.  We actually use
- * a pointer patendseg which skips the first character if we are not
- * parsing a file pattern.
- * Note that the size of this and the next array are hard-wired
- * via the definitions.
+ * Array of characters corresponding to zpc_chars enum, which it must match.
  */
-
-static char endseg[] = {
-    '/',			/* file only */
-    '\0', Bar, Outpar,		/* all patterns */
-    Tilde			/* extended glob only */
+static const char zpc_chars[ZPC_COUNT] = {
+    '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
+    Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
 };
 
-#define PATENDSEGLEN_NORM 4
-#define PATENDSEGLEN_EXT  5
-
-/* Characters which terminate a simple string */
-
-static char endstr[] = {
-    '/',			/* file only */
-    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
-				/* all patterns */
-    Tilde, Hat, Pound		/* extended glob only */
-};
-
-#define PATENDSTRLEN_NORM 10
-#define PATENDSTRLEN_EXT  13
+/*
+ * Characters which terminate a simple string (ZPC_COUNT) or
+ * an entire pattern segment (the first ZPC_SEG_COUNT).
+ * Each entry is either the corresponding character in zpc_chars
+ * or Marker which is guaranteed not to match a character in a
+ * pattern we are compiling.
+ *
+ * The complete list indicates characters that are special, so e.g.
+ * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
+ * *and* Tilde is currently special.
+ */
 
+/**/
+char zpc_special[ZPC_COUNT];
 
 /* Default size for pattern buffer */
 #define P_DEF_ALLOC 256
@@ -264,10 +257,6 @@ static char *patcode;		/* point of code emission */
 static long patsize;		/* size of code */
 static char *patout;		/* start of code emission string */
 static long patalloc;		/* size allocated for same */
-static char *patendseg;		/* characters ending segment */
-static int patendseglen;	/* length of same */
-static char *patendstr;		/* characters ending plain string */
-static int patendstrlen;	/* length of sameo */
 
 /* Flags used in both compilation and execution */
 static int patflags;		    /* flags passed down to patcompile */
@@ -417,12 +406,56 @@ static long rn_offs;
 		    (P_OP(p) == P_BACK) ? \
 		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
 
+/*
+ * Set up zpc_special with characters that end a string segment.
+ * "Marker" cannot occur in the pattern we are compiling so
+ * is used to mark "invalid".
+ */
+static void
+patcompcharsset(void)
+{
+    memcpy(zpc_special, zpc_chars, ZPC_COUNT);
+    if (!isset(EXTENDEDGLOB)) {
+	/* Extended glob characters are not active */
+	zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
+	    zpc_special[ZPC_HASH] = Marker;
+    }
+    if (!isset(KSHGLOB)) {
+	/*
+	 * Ksh glob characters are not active.
+	 * * and ? are shared with normal globbing, but for their
+	 * use here we are looking for a following Inpar.
+	 */
+	zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
+	    zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
+	    zpc_special[ZPC_KSH_AT] = Marker;
+    }
+    /*
+     * Note that if we are using KSHGLOB, then we test for a following
+     * Inpar, not zpc_special[ZPC_INPAR]:  the latter makes an Inpar on
+     * its own active.  The zpc_special[ZPC_KSH_*] followed by any old Inpar
+     * discriminate ksh globbing.
+     */
+    if (isset(SHGLOB)) {
+	/*
+	 * Grouping and numeric ranges are not valid.
+	 * We do allow alternation, however; it's needed for
+	 * "case".  This may not be entirely consistent.
+	 *
+	 * Don't disable Outpar: we may need to match the end of KSHGLOB
+	 * parentheses and it would be difficult to tell them apart.
+	 */
+	zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
+    }
+}
+
 /* Called before parsing a set of file matchs to initialize flags */
 
 /**/
 void
 patcompstart(void)
 {
+    patcompcharsset();
     if (isset(CASEGLOB))
 	patglobflags = 0;
     else
@@ -469,16 +502,9 @@ patcompile(char *exp, int inflags, char **endexp)
     patnpar = 1;
     patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
 
-    patendseg = endseg;
-    patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
-    patendstr = endstr;
-    patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;
-
     if (!(patflags & PAT_FILE)) {
-	patendseg++;
-	patendstr++;
-	patendseglen--;
-	patendstrlen--;
+	patcompcharsset();
+	zpc_special[ZPC_SLASH] = Marker;
 	remnulargs(patparse);
 	if (isset(MULTIBYTE))
 	    patglobflags = GF_MULTIBYTE;
@@ -698,11 +724,11 @@ patcompswitch(int paren, int *flagp)
 
     *flagp |= flags & (P_HSTART|P_PURESTR);
 
-    while (*patparse == Bar ||
-	   (isset(EXTENDEDGLOB) && *patparse == Tilde &&
+    while (*patparse == zpc_chars[ZPC_BAR] ||
+	   (*patparse == zpc_special[ZPC_TILDE] &&
 	    (patparse[1] == '/' ||
-	     !memchr(patendseg, patparse[1], patendseglen)))) {
-	int tilde = *patparse++ == Tilde;
+	     !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
+	int tilde = *patparse++ == zpc_special[ZPC_TILDE];
 	long gfnode = 0, newbr;
 
 	*flagp &= ~P_PURESTR;
@@ -739,12 +765,9 @@ patcompswitch(int paren, int *flagp)
 	    up.p = NULL;
 	    patadd((char *)&up, 0, sizeof(up), 0);
 	    /* / is not treated as special if we are at top level */
-	    if (!paren && *patendseg == '/') {
+	    if (!paren && zpc_special[ZPC_SLASH] == '/') {
 		tilde++;
-		patendseg++;
-		patendseglen--;
-		patendstr++;
-		patendstrlen--;
+		zpc_special[ZPC_SLASH] = Marker;
 	    }
 	} else {
 	    excsync = 0;
@@ -784,10 +807,7 @@ patcompswitch(int paren, int *flagp)
 	newbr = patcompbranch(&flags);
 	if (tilde == 2) {
 	    /* restore special treatment of / */
-	    patendseg--;
-	    patendseglen++;
-	    patendstr--;
-	    patendstrlen++;
+	    zpc_special[ZPC_SLASH] = '/';
 	}
 	if (!newbr)
 	    return 0;
@@ -855,14 +875,13 @@ patcompbranch(int *flagp)
     *flagp = P_PURESTR;
 
     starter = chain = 0;
-    while (!memchr(patendseg, *patparse, patendseglen) ||
-	   (*patparse == Tilde && patparse[1] != '/' &&
-	    memchr(patendseg, patparse[1], patendseglen))) {
-	if (isset(EXTENDEDGLOB) &&
-	    ((!isset(SHGLOB) &&
-	      (*patparse == Inpar && patparse[1] == Pound)) ||
-	     (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar &&
-	      patparse[2] == Pound))) {
+    while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
+	   (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
+	    memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
+	if ((*patparse == zpc_special[ZPC_INPAR] &&
+	     patparse[1] == zpc_special[ZPC_HASH]) ||
+	    (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
+	     patparse[2] == zpc_special[ZPC_HASH])) {
 	    /* Globbing flags. */
 	    char *pp1 = patparse;
 	    int oldglobflags = patglobflags, ignore;
@@ -910,7 +929,7 @@ patcompbranch(int *flagp)
 		break;
 	    else
 		continue;
-	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
+	} else if (*patparse == zpc_special[ZPC_HAT]) {
 	    /*
 	     * ^pat:  anything but pat.  For proper backtracking,
 	     * etc., we turn this into (*~pat), except without the
@@ -1171,7 +1190,7 @@ patcomppiece(int *flagp)
 {
     long starter = 0, next, op, opnd;
     int flags, flags2, kshchar, len, ch, patch, nmeta;
-    int pound, count;
+    int hash, count;
     union upat up;
     char *nptr, *str0, *ptr, *patprev;
     zrange_t from, to;
@@ -1185,11 +1204,17 @@ patcomppiece(int *flagp)
 	 * the string doesn't introduce a ksh-like parenthesized expression.
 	 */
 	kshchar = '\0';
-	if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
-	    if (strchr("?*+!@", *patparse))
-		kshchar = STOUC(*patparse);
-	    else if (*patparse == Star || *patparse == Quest)
-		kshchar = STOUC(ztokens[*patparse - Pound]);
+	if (*patparse && patparse[1] == Inpar) {
+	    if (*patparse == zpc_special[ZPC_KSH_PLUS])
+		kshchar = STOUC('+');
+	    else if (*patparse == zpc_special[ZPC_KSH_BANG])
+		kshchar = STOUC('!');
+	    else if (*patparse == zpc_special[ZPC_KSH_AT])
+		kshchar = STOUC('@');
+	    else if (*patparse == zpc_special[ZPC_KSH_STAR])
+		kshchar = STOUC('*');
+	    else if (*patparse == zpc_special[ZPC_KSH_QUEST])
+		kshchar = STOUC('?');
 	}
 
 	/*
@@ -1199,10 +1224,10 @@ patcomppiece(int *flagp)
 	 * tildes are not special if there is nothing following to
 	 * be excluded.
 	 */
-	if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
-			(*patparse != Tilde ||
+	if (kshchar || (memchr(zpc_special, *patparse, ZPC_COUNT) &&
+			(*patparse != zpc_special[ZPC_TILDE] ||
 			 patparse[1] == '/' ||
-			 !memchr(patendseg, patparse[1], patendseglen))))
+			 !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))))
 	    break;
 
 	/* Remember the previous character for backtracking */
@@ -1227,10 +1252,14 @@ patcomppiece(int *flagp)
 	 * If we have more than one character, a following hash
 	 * or (#c...) only applies to the last, so backtrack one character.
 	 */
-	if (isset(EXTENDEDGLOB) &&
-	    (*patparse == Pound ||
-	     (*patparse == Inpar && patparse[1] == Pound &&
-	      patparse[2] == 'c')) && morelen)
+	if ((*patparse == zpc_special[ZPC_HASH] ||
+	     (*patparse == zpc_special[ZPC_INPAR] &&
+	      patparse[1] == zpc_special[ZPC_HASH] &&
+	      patparse[2] == 'c') ||
+	     (*patparse == zpc_special[ZPC_KSH_AT] &&
+	      patparse[1] == Inpar &&
+	      patparse[2] == zpc_special[ZPC_HASH] &&
+	      patparse[3] == 'c')) && morelen)
 	    patparse = patprev;
 	/*
 	 * If len is 1, we can't have an active # following, so doesn't
@@ -1306,15 +1335,21 @@ patcomppiece(int *flagp)
 	METACHARINC(patparse);
 	switch(patch) {
 	case Quest:
+	    DPUTS(zpc_special[ZPC_QUEST] == Marker,
+		  "Treating '?' as pattern character although disabled");
 	    flags |= P_SIMPLE;
 	    starter = patnode(P_ANY);
 	    break;
 	case Star:
+	    DPUTS(zpc_special[ZPC_STAR] == Marker,
+		  "Treating '*' as pattern character although disabled");
 	    /* kshchar is used as a sign that we can't have #'s. */
 	    kshchar = -1;
 	    starter = patnode(P_STAR);
 	    break;
 	case Inbrack:
+	    DPUTS(zpc_special[ZPC_INBRACK] == Marker,
+		  "Treating '[' as pattern character although disabled");
 	    flags |= P_SIMPLE;
 	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
 		patparse++;
@@ -1368,9 +1403,10 @@ patcomppiece(int *flagp)
 	    patadd(NULL, 0, 1, 0);
 	    break;
 	case Inpar:
-	    /* is this how to treat parentheses in SHGLOB? */
-	    if (isset(SHGLOB) && !kshchar)
-		return 0;
+	    DPUTS(zpc_special[ZPC_INPAR] == Marker,
+		  "Treating '(' as pattern character although disabled");
+	    DPUTS(isset(SHGLOB) && !kshchar,
+		  "Treating bare '(' as pattern character with SHGLOB");
 	    if (kshchar == '!') {
 		/* This is nasty, we should really either handle all
 		 * kshglobbing below or here.  But most of the
@@ -1393,6 +1429,9 @@ patcomppiece(int *flagp)
 	    break;
 	case Inang:
 	    /* Numeric glob */
+	    DPUTS(zpc_special[ZPC_INANG] == Marker,
+		  "Treating '<' as pattern character although disabled");
+	    DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
 	    len = 0;		/* beginning present 1, end present 2 */
 	    if (idigit(*patparse)) {
 		from = (zrange_t) zstrtol((char *)patparse,
@@ -1435,6 +1474,8 @@ patcomppiece(int *flagp)
 	     */
 	    break;
 	case Pound:
+	    DPUTS(zpc_special[ZPC_HASH] == Marker,
+		  "Treating '#' as pattern character although disabled");
 	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
 	    /*
 	     * A hash here is an error; it should follow something
@@ -1465,16 +1506,21 @@ patcomppiece(int *flagp)
     }
 
     count = 0;
-    if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
-	!(count = (isset(EXTENDEDGLOB) && *patparse == Inpar &&
-		   patparse[1] == Pound && patparse[2] == 'c')) &&
+    if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
+	!(count = ((*patparse == zpc_special[ZPC_INPAR] &&
+		    patparse[1] == zpc_special[ZPC_HASH] &&
+		    patparse[2] == 'c') ||
+		   (*patparse == zpc_special[ZPC_KSH_AT] &&
+		    patparse[1] == Inpar &&
+		    patparse[2] == zpc_special[ZPC_HASH] &&
+		    patparse[3] == 'c'))) &&
 	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
 	*flagp = flags;
 	return starter;
     }
 
     /* too much at once doesn't currently work */
-    if (kshchar && (pound || count))
+    if (kshchar && (hash || count))
 	return 0;
 
     if (kshchar == '*') {
@@ -1490,7 +1536,7 @@ patcomppiece(int *flagp)
 	op = P_COUNT;
 	patparse += 3;
 	*flagp = P_HSTART;
-    } else if (*++patparse == Pound) {
+    } else if (*++patparse == zpc_special[ZPC_HASH]) {
 	op = P_TWOHASH;
 	patparse++;
 	*flagp = P_HSTART;
@@ -1600,7 +1646,7 @@ patcomppiece(int *flagp)
 	pattail(starter, next);
 	patoptail(starter, next);
     }
-    if (*patparse == Pound)
+    if (*patparse == zpc_special[ZPC_HASH])
 	return 0;
 
     return starter;
diff --git a/Src/zsh.h b/Src/zsh.h
index f247563d4..639c2b746 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -179,7 +179,11 @@ struct mathfunc {
  * Take care to update the use of IMETA appropriately when adding
  * tokens here.
  */
-/* Marker used in paramsubst for rc_expand_param */
+/*
+ * Marker used in paramsubst for rc_expand_param.
+ * Also used in pattern character arrays as guaranteed not to
+ * mark a character in a string.
+ */
 #define Marker		((char) 0xa0)
 
 /* chars that need to be quoted if meant literally */
@@ -1375,6 +1379,40 @@ struct patprog {
 #define PAT_HAS_EXCLUDP	0x0800	/* (internal): top-level path1~path2. */
 #define PAT_LCMATCHUC   0x1000  /* equivalent to setting (#l) */
 
+/**
+ * Indexes into the array of active pattern characters.
+ * This must match the array zpc_chars in pattern.c.
+ */
+enum zpc_chars {
+    /*
+     * These characters both terminate a pattern segment and
+     * a pure string segment.
+     */
+    ZPC_SLASH,			/* / active as file separator */
+    ZPC_NULL,			/* \0 as string terminator */
+    ZPC_BAR,			/* | for "or" */
+    ZPC_OUTPAR,			/* ) for grouping */
+    ZPC_TILDE,			/* ~ for exclusion (extended glob) */
+    ZPC_SEG_COUNT,              /* No. of the above characters */
+    /*
+     * These characters terminate a pure string segment.
+     */
+    ZPC_INPAR = ZPC_SEG_COUNT,  /* ( for grouping */
+    ZPC_QUEST,			/* ? as wildcard */
+    ZPC_STAR,			/* * as wildcard */
+    ZPC_INBRACK,		/* [ for character class */
+    ZPC_INANG,			/* < for numeric glob */
+    ZPC_HAT,			/* ^ for exclusion (extended glob) */
+    ZPC_HASH,			/* # for repetition (extended glob) */
+    ZPC_BNULLKEEP,		/* Special backslashed null not removed */
+    ZPC_KSH_QUEST,              /* ? for ?(...) in KSH_GLOB */
+    ZPC_KSH_STAR,               /* * for *(...) in KSH_GLOB */
+    ZPC_KSH_PLUS,               /* + for +(...) in KSH_GLOB */
+    ZPC_KSH_BANG,               /* ! for !(...) in KSH_GLOB */
+    ZPC_KSH_AT,                 /* @ for @(...) in KSH_GLOB */
+    ZPC_COUNT			/* Number of special chararacters */
+};
+
 /*
  * Special match types used in character classes.  These
  * are represented as tokens, with Meta added.  The character