about summary refs log tree commit diff
path: root/Src/pattern.c
diff options
context:
space:
mode:
Diffstat (limited to 'Src/pattern.c')
-rw-r--r--Src/pattern.c208
1 files changed, 127 insertions, 81 deletions
diff --git a/Src/pattern.c b/Src/pattern.c
index 3b6edb850..54d6e7cb3 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -225,34 +225,27 @@ typedef unsigned long zrange_t;
 #endif
 
 /*
- * Characters which terminate a pattern segment.  We actually use
- * a pointer patendseg which skips the first character if we are not
- * parsing a file pattern.
- * Note that the size of this and the next array are hard-wired
- * via the definitions.
+ * Array of characters corresponding to zpc_chars enum, which it must match.
  */
-
-static char endseg[] = {
-    '/',			/* file only */
-    '\0', Bar, Outpar,		/* all patterns */
-    Tilde			/* extended glob only */
+static const char zpc_chars[ZPC_COUNT] = {
+    '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
+    Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
 };
 
-#define PATENDSEGLEN_NORM 4
-#define PATENDSEGLEN_EXT  5
-
-/* Characters which terminate a simple string */
-
-static char endstr[] = {
-    '/',			/* file only */
-    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
-				/* all patterns */
-    Tilde, Hat, Pound		/* extended glob only */
-};
-
-#define PATENDSTRLEN_NORM 10
-#define PATENDSTRLEN_EXT  13
+/*
+ * Characters which terminate a simple string (ZPC_COUNT) or
+ * an entire pattern segment (the first ZPC_SEG_COUNT).
+ * Each entry is either the corresponding character in zpc_chars
+ * or Marker which is guaranteed not to match a character in a
+ * pattern we are compiling.
+ *
+ * The complete list indicates characters that are special, so e.g.
+ * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
+ * *and* Tilde is currently special.
+ */
 
+/**/
+char zpc_special[ZPC_COUNT];
 
 /* Default size for pattern buffer */
 #define P_DEF_ALLOC 256
@@ -264,10 +257,6 @@ static char *patcode;		/* point of code emission */
 static long patsize;		/* size of code */
 static char *patout;		/* start of code emission string */
 static long patalloc;		/* size allocated for same */
-static char *patendseg;		/* characters ending segment */
-static int patendseglen;	/* length of same */
-static char *patendstr;		/* characters ending plain string */
-static int patendstrlen;	/* length of sameo */
 
 /* Flags used in both compilation and execution */
 static int patflags;		    /* flags passed down to patcompile */
@@ -417,12 +406,56 @@ static long rn_offs;
 		    (P_OP(p) == P_BACK) ? \
 		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
 
+/*
+ * Set up zpc_special with characters that end a string segment.
+ * "Marker" cannot occur in the pattern we are compiling so
+ * is used to mark "invalid".
+ */
+static void
+patcompcharsset(void)
+{
+    memcpy(zpc_special, zpc_chars, ZPC_COUNT);
+    if (!isset(EXTENDEDGLOB)) {
+	/* Extended glob characters are not active */
+	zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
+	    zpc_special[ZPC_HASH] = Marker;
+    }
+    if (!isset(KSHGLOB)) {
+	/*
+	 * Ksh glob characters are not active.
+	 * * and ? are shared with normal globbing, but for their
+	 * use here we are looking for a following Inpar.
+	 */
+	zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
+	    zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
+	    zpc_special[ZPC_KSH_AT] = Marker;
+    }
+    /*
+     * Note that if we are using KSHGLOB, then we test for a following
+     * Inpar, not zpc_special[ZPC_INPAR]:  the latter makes an Inpar on
+     * its own active.  The zpc_special[ZPC_KSH_*] followed by any old Inpar
+     * discriminate ksh globbing.
+     */
+    if (isset(SHGLOB)) {
+	/*
+	 * Grouping and numeric ranges are not valid.
+	 * We do allow alternation, however; it's needed for
+	 * "case".  This may not be entirely consistent.
+	 *
+	 * Don't disable Outpar: we may need to match the end of KSHGLOB
+	 * parentheses and it would be difficult to tell them apart.
+	 */
+	zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
+    }
+}
+
 /* Called before parsing a set of file matchs to initialize flags */
 
 /**/
 void
 patcompstart(void)
 {
+    patcompcharsset();
     if (isset(CASEGLOB))
 	patglobflags = 0;
     else
@@ -469,16 +502,9 @@ patcompile(char *exp, int inflags, char **endexp)
     patnpar = 1;
     patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
 
-    patendseg = endseg;
-    patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
-    patendstr = endstr;
-    patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;
-
     if (!(patflags & PAT_FILE)) {
-	patendseg++;
-	patendstr++;
-	patendseglen--;
-	patendstrlen--;
+	patcompcharsset();
+	zpc_special[ZPC_SLASH] = Marker;
 	remnulargs(patparse);
 	if (isset(MULTIBYTE))
 	    patglobflags = GF_MULTIBYTE;
@@ -698,11 +724,11 @@ patcompswitch(int paren, int *flagp)
 
     *flagp |= flags & (P_HSTART|P_PURESTR);
 
-    while (*patparse == Bar ||
-	   (isset(EXTENDEDGLOB) && *patparse == Tilde &&
+    while (*patparse == zpc_chars[ZPC_BAR] ||
+	   (*patparse == zpc_special[ZPC_TILDE] &&
 	    (patparse[1] == '/' ||
-	     !memchr(patendseg, patparse[1], patendseglen)))) {
-	int tilde = *patparse++ == Tilde;
+	     !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
+	int tilde = *patparse++ == zpc_special[ZPC_TILDE];
 	long gfnode = 0, newbr;
 
 	*flagp &= ~P_PURESTR;
@@ -739,12 +765,9 @@ patcompswitch(int paren, int *flagp)
 	    up.p = NULL;
 	    patadd((char *)&up, 0, sizeof(up), 0);
 	    /* / is not treated as special if we are at top level */
-	    if (!paren && *patendseg == '/') {
+	    if (!paren && zpc_special[ZPC_SLASH] == '/') {
 		tilde++;
-		patendseg++;
-		patendseglen--;
-		patendstr++;
-		patendstrlen--;
+		zpc_special[ZPC_SLASH] = Marker;
 	    }
 	} else {
 	    excsync = 0;
@@ -784,10 +807,7 @@ patcompswitch(int paren, int *flagp)
 	newbr = patcompbranch(&flags);
 	if (tilde == 2) {
 	    /* restore special treatment of / */
-	    patendseg--;
-	    patendseglen++;
-	    patendstr--;
-	    patendstrlen++;
+	    zpc_special[ZPC_SLASH] = '/';
 	}
 	if (!newbr)
 	    return 0;
@@ -855,14 +875,13 @@ patcompbranch(int *flagp)
     *flagp = P_PURESTR;
 
     starter = chain = 0;
-    while (!memchr(patendseg, *patparse, patendseglen) ||
-	   (*patparse == Tilde && patparse[1] != '/' &&
-	    memchr(patendseg, patparse[1], patendseglen))) {
-	if (isset(EXTENDEDGLOB) &&
-	    ((!isset(SHGLOB) &&
-	      (*patparse == Inpar && patparse[1] == Pound)) ||
-	     (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar &&
-	      patparse[2] == Pound))) {
+    while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
+	   (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
+	    memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
+	if ((*patparse == zpc_special[ZPC_INPAR] &&
+	     patparse[1] == zpc_special[ZPC_HASH]) ||
+	    (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
+	     patparse[2] == zpc_special[ZPC_HASH])) {
 	    /* Globbing flags. */
 	    char *pp1 = patparse;
 	    int oldglobflags = patglobflags, ignore;
@@ -910,7 +929,7 @@ patcompbranch(int *flagp)
 		break;
 	    else
 		continue;
-	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
+	} else if (*patparse == zpc_special[ZPC_HAT]) {
 	    /*
 	     * ^pat:  anything but pat.  For proper backtracking,
 	     * etc., we turn this into (*~pat), except without the
@@ -1171,7 +1190,7 @@ patcomppiece(int *flagp)
 {
     long starter = 0, next, op, opnd;
     int flags, flags2, kshchar, len, ch, patch, nmeta;
-    int pound, count;
+    int hash, count;
     union upat up;
     char *nptr, *str0, *ptr, *patprev;
     zrange_t from, to;
@@ -1185,11 +1204,17 @@ patcomppiece(int *flagp)
 	 * the string doesn't introduce a ksh-like parenthesized expression.
 	 */
 	kshchar = '\0';
-	if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
-	    if (strchr("?*+!@", *patparse))
-		kshchar = STOUC(*patparse);
-	    else if (*patparse == Star || *patparse == Quest)
-		kshchar = STOUC(ztokens[*patparse - Pound]);
+	if (*patparse && patparse[1] == Inpar) {
+	    if (*patparse == zpc_special[ZPC_KSH_PLUS])
+		kshchar = STOUC('+');
+	    else if (*patparse == zpc_special[ZPC_KSH_BANG])
+		kshchar = STOUC('!');
+	    else if (*patparse == zpc_special[ZPC_KSH_AT])
+		kshchar = STOUC('@');
+	    else if (*patparse == zpc_special[ZPC_KSH_STAR])
+		kshchar = STOUC('*');
+	    else if (*patparse == zpc_special[ZPC_KSH_QUEST])
+		kshchar = STOUC('?');
 	}
 
 	/*
@@ -1199,10 +1224,10 @@ patcomppiece(int *flagp)
 	 * tildes are not special if there is nothing following to
 	 * be excluded.
 	 */
-	if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
-			(*patparse != Tilde ||
+	if (kshchar || (memchr(zpc_special, *patparse, ZPC_COUNT) &&
+			(*patparse != zpc_special[ZPC_TILDE] ||
 			 patparse[1] == '/' ||
-			 !memchr(patendseg, patparse[1], patendseglen))))
+			 !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))))
 	    break;
 
 	/* Remember the previous character for backtracking */
@@ -1227,10 +1252,14 @@ patcomppiece(int *flagp)
 	 * If we have more than one character, a following hash
 	 * or (#c...) only applies to the last, so backtrack one character.
 	 */
-	if (isset(EXTENDEDGLOB) &&
-	    (*patparse == Pound ||
-	     (*patparse == Inpar && patparse[1] == Pound &&
-	      patparse[2] == 'c')) && morelen)
+	if ((*patparse == zpc_special[ZPC_HASH] ||
+	     (*patparse == zpc_special[ZPC_INPAR] &&
+	      patparse[1] == zpc_special[ZPC_HASH] &&
+	      patparse[2] == 'c') ||
+	     (*patparse == zpc_special[ZPC_KSH_AT] &&
+	      patparse[1] == Inpar &&
+	      patparse[2] == zpc_special[ZPC_HASH] &&
+	      patparse[3] == 'c')) && morelen)
 	    patparse = patprev;
 	/*
 	 * If len is 1, we can't have an active # following, so doesn't
@@ -1306,15 +1335,21 @@ patcomppiece(int *flagp)
 	METACHARINC(patparse);
 	switch(patch) {
 	case Quest:
+	    DPUTS(zpc_special[ZPC_QUEST] == Marker,
+		  "Treating '?' as pattern character although disabled");
 	    flags |= P_SIMPLE;
 	    starter = patnode(P_ANY);
 	    break;
 	case Star:
+	    DPUTS(zpc_special[ZPC_STAR] == Marker,
+		  "Treating '*' as pattern character although disabled");
 	    /* kshchar is used as a sign that we can't have #'s. */
 	    kshchar = -1;
 	    starter = patnode(P_STAR);
 	    break;
 	case Inbrack:
+	    DPUTS(zpc_special[ZPC_INBRACK] == Marker,
+		  "Treating '[' as pattern character although disabled");
 	    flags |= P_SIMPLE;
 	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
 		patparse++;
@@ -1368,9 +1403,10 @@ patcomppiece(int *flagp)
 	    patadd(NULL, 0, 1, 0);
 	    break;
 	case Inpar:
-	    /* is this how to treat parentheses in SHGLOB? */
-	    if (isset(SHGLOB) && !kshchar)
-		return 0;
+	    DPUTS(zpc_special[ZPC_INPAR] == Marker,
+		  "Treating '(' as pattern character although disabled");
+	    DPUTS(isset(SHGLOB) && !kshchar,
+		  "Treating bare '(' as pattern character with SHGLOB");
 	    if (kshchar == '!') {
 		/* This is nasty, we should really either handle all
 		 * kshglobbing below or here.  But most of the
@@ -1393,6 +1429,9 @@ patcomppiece(int *flagp)
 	    break;
 	case Inang:
 	    /* Numeric glob */
+	    DPUTS(zpc_special[ZPC_INANG] == Marker,
+		  "Treating '<' as pattern character although disabled");
+	    DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
 	    len = 0;		/* beginning present 1, end present 2 */
 	    if (idigit(*patparse)) {
 		from = (zrange_t) zstrtol((char *)patparse,
@@ -1435,6 +1474,8 @@ patcomppiece(int *flagp)
 	     */
 	    break;
 	case Pound:
+	    DPUTS(zpc_special[ZPC_HASH] == Marker,
+		  "Treating '#' as pattern character although disabled");
 	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
 	    /*
 	     * A hash here is an error; it should follow something
@@ -1465,16 +1506,21 @@ patcomppiece(int *flagp)
     }
 
     count = 0;
-    if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
-	!(count = (isset(EXTENDEDGLOB) && *patparse == Inpar &&
-		   patparse[1] == Pound && patparse[2] == 'c')) &&
+    if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
+	!(count = ((*patparse == zpc_special[ZPC_INPAR] &&
+		    patparse[1] == zpc_special[ZPC_HASH] &&
+		    patparse[2] == 'c') ||
+		   (*patparse == zpc_special[ZPC_KSH_AT] &&
+		    patparse[1] == Inpar &&
+		    patparse[2] == zpc_special[ZPC_HASH] &&
+		    patparse[3] == 'c'))) &&
 	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
 	*flagp = flags;
 	return starter;
     }
 
     /* too much at once doesn't currently work */
-    if (kshchar && (pound || count))
+    if (kshchar && (hash || count))
 	return 0;
 
     if (kshchar == '*') {
@@ -1490,7 +1536,7 @@ patcomppiece(int *flagp)
 	op = P_COUNT;
 	patparse += 3;
 	*flagp = P_HSTART;
-    } else if (*++patparse == Pound) {
+    } else if (*++patparse == zpc_special[ZPC_HASH]) {
 	op = P_TWOHASH;
 	patparse++;
 	*flagp = P_HSTART;
@@ -1600,7 +1646,7 @@ patcomppiece(int *flagp)
 	pattail(starter, next);
 	patoptail(starter, next);
     }
-    if (*patparse == Pound)
+    if (*patparse == zpc_special[ZPC_HASH])
 	return 0;
 
     return starter;