From 68d0d76db55c0b8778f0b68d3eda54060b576c41 Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Sat, 1 Jun 2013 20:39:09 +0100
Subject: 31441: use array to decide which forms of pattern are enabled
---
Src/glob.c | 20 +++---
Src/pattern.c | 208 +++++++++++++++++++++++++++++++++++-----------------------
Src/zsh.h | 40 ++++++++++-
3 files changed, 177 insertions(+), 91 deletions(-)
(limited to 'Src')
diff --git a/Src/glob.c b/Src/glob.c
index ca2ffaf51..db86d2468 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -708,8 +708,9 @@ parsecomplist(char *instr)
}
/* Parse repeated directories such as (dir/)# and (dir/)## */
- if (*(str = instr) == Inpar && !skipparens(Inpar, Outpar, (char **)&str) &&
- *str == Pound && isset(EXTENDEDGLOB) && str[-2] == '/') {
+ if (*(str = instr) == zpc_special[ZPC_INPAR] &&
+ !skipparens(Inpar, Outpar, (char **)&str) &&
+ *str == zpc_special[ZPC_HASH] && str[-2] == '/') {
instr++;
if (!(p1 = patcompile(instr, compflags, &instr)))
return NULL;
@@ -761,9 +762,9 @@ parsepat(char *str)
* Check for initial globbing flags, so that they don't form
* a bogus path component.
*/
- if ((*str == Inpar && str[1] == Pound && isset(EXTENDEDGLOB)) ||
- (isset(KSHGLOB) && *str == '@' && str[1] == Inpar &&
- str[2] == Pound)) {
+ if ((*str == zpc_special[ZPC_INPAR] && str[1] == zpc_special[ZPC_HASH]) ||
+ (*str == zpc_special[ZPC_KSH_AT] && str[1] == Inpar &&
+ str[2] == zpc_special[ZPC_HASH])) {
str += (*str == Inpar) ? 2 : 3;
if (!patgetglobflags(&str, &assert, &ignore))
return NULL;
@@ -1146,7 +1147,7 @@ zglob(LinkList list, LinkNode np, int nountok)
gf_pre_words = NULL;
/* Check for qualifiers */
- while (!nobareglob || isset(EXTENDEDGLOB)) {
+ while (!nobareglob || zpc_special[ZPC_HASH] != Marker) {
struct qual *newquals;
char *s;
int sense, paren;
@@ -1192,10 +1193,11 @@ zglob(LinkList list, LinkNode np, int nountok)
case Outpar:
paren++; /*FALLTHROUGH*/
case Bar:
- nobareglob = 1;
+ if (zpc_special[ZPC_BAR] != Marker)
+ nobareglob = 1;
break;
case Tilde:
- if (isset(EXTENDEDGLOB))
+ if (zpc_special[ZPC_TILDE] != Marker)
nobareglob = 1;
break;
case Inpar:
@@ -1205,7 +1207,7 @@ zglob(LinkList list, LinkNode np, int nountok)
}
if (*s != Inpar)
break;
- if (isset(EXTENDEDGLOB) && s[1] == Pound) {
+ if (s[1] == zpc_special[ZPC_HASH]) {
if (s[2] == 'q') {
*s = 0;
s += 2;
diff --git a/Src/pattern.c b/Src/pattern.c
index 3b6edb850..54d6e7cb3 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -225,34 +225,27 @@ typedef unsigned long zrange_t;
#endif
/*
- * Characters which terminate a pattern segment. We actually use
- * a pointer patendseg which skips the first character if we are not
- * parsing a file pattern.
- * Note that the size of this and the next array are hard-wired
- * via the definitions.
+ * Array of characters corresponding to zpc_chars enum, which it must match.
*/
-
-static char endseg[] = {
- '/', /* file only */
- '\0', Bar, Outpar, /* all patterns */
- Tilde /* extended glob only */
+static const char zpc_chars[ZPC_COUNT] = {
+ '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
+ Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
};
-#define PATENDSEGLEN_NORM 4
-#define PATENDSEGLEN_EXT 5
-
-/* Characters which terminate a simple string */
-
-static char endstr[] = {
- '/', /* file only */
- '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
- /* all patterns */
- Tilde, Hat, Pound /* extended glob only */
-};
-
-#define PATENDSTRLEN_NORM 10
-#define PATENDSTRLEN_EXT 13
+/*
+ * Characters which terminate a simple string (ZPC_COUNT) or
+ * an entire pattern segment (the first ZPC_SEG_COUNT).
+ * Each entry is either the corresponding character in zpc_chars
+ * or Marker which is guaranteed not to match a character in a
+ * pattern we are compiling.
+ *
+ * The complete list indicates characters that are special, so e.g.
+ * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
+ * *and* Tilde is currently special.
+ */
+/**/
+char zpc_special[ZPC_COUNT];
/* Default size for pattern buffer */
#define P_DEF_ALLOC 256
@@ -264,10 +257,6 @@ static char *patcode; /* point of code emission */
static long patsize; /* size of code */
static char *patout; /* start of code emission string */
static long patalloc; /* size allocated for same */
-static char *patendseg; /* characters ending segment */
-static int patendseglen; /* length of same */
-static char *patendstr; /* characters ending plain string */
-static int patendstrlen; /* length of sameo */
/* Flags used in both compilation and execution */
static int patflags; /* flags passed down to patcompile */
@@ -417,12 +406,56 @@ static long rn_offs;
(P_OP(p) == P_BACK) ? \
((p)-rn_offs) : ((p)+rn_offs) : NULL)
+/*
+ * Set up zpc_special with characters that end a string segment.
+ * "Marker" cannot occur in the pattern we are compiling so
+ * is used to mark "invalid".
+ */
+static void
+patcompcharsset(void)
+{
+ memcpy(zpc_special, zpc_chars, ZPC_COUNT);
+ if (!isset(EXTENDEDGLOB)) {
+ /* Extended glob characters are not active */
+ zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
+ zpc_special[ZPC_HASH] = Marker;
+ }
+ if (!isset(KSHGLOB)) {
+ /*
+ * Ksh glob characters are not active.
+ * * and ? are shared with normal globbing, but for their
+ * use here we are looking for a following Inpar.
+ */
+ zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
+ zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
+ zpc_special[ZPC_KSH_AT] = Marker;
+ }
+ /*
+ * Note that if we are using KSHGLOB, then we test for a following
+ * Inpar, not zpc_special[ZPC_INPAR]: the latter makes an Inpar on
+ * its own active. The zpc_special[ZPC_KSH_*] followed by any old Inpar
+ * discriminate ksh globbing.
+ */
+ if (isset(SHGLOB)) {
+ /*
+ * Grouping and numeric ranges are not valid.
+ * We do allow alternation, however; it's needed for
+ * "case". This may not be entirely consistent.
+ *
+ * Don't disable Outpar: we may need to match the end of KSHGLOB
+ * parentheses and it would be difficult to tell them apart.
+ */
+ zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
+ }
+}
+
/* Called before parsing a set of file matchs to initialize flags */
/**/
void
patcompstart(void)
{
+ patcompcharsset();
if (isset(CASEGLOB))
patglobflags = 0;
else
@@ -469,16 +502,9 @@ patcompile(char *exp, int inflags, char **endexp)
patnpar = 1;
patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
- patendseg = endseg;
- patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
- patendstr = endstr;
- patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;
-
if (!(patflags & PAT_FILE)) {
- patendseg++;
- patendstr++;
- patendseglen--;
- patendstrlen--;
+ patcompcharsset();
+ zpc_special[ZPC_SLASH] = Marker;
remnulargs(patparse);
if (isset(MULTIBYTE))
patglobflags = GF_MULTIBYTE;
@@ -698,11 +724,11 @@ patcompswitch(int paren, int *flagp)
*flagp |= flags & (P_HSTART|P_PURESTR);
- while (*patparse == Bar ||
- (isset(EXTENDEDGLOB) && *patparse == Tilde &&
+ while (*patparse == zpc_chars[ZPC_BAR] ||
+ (*patparse == zpc_special[ZPC_TILDE] &&
(patparse[1] == '/' ||
- !memchr(patendseg, patparse[1], patendseglen)))) {
- int tilde = *patparse++ == Tilde;
+ !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
+ int tilde = *patparse++ == zpc_special[ZPC_TILDE];
long gfnode = 0, newbr;
*flagp &= ~P_PURESTR;
@@ -739,12 +765,9 @@ patcompswitch(int paren, int *flagp)
up.p = NULL;
patadd((char *)&up, 0, sizeof(up), 0);
/* / is not treated as special if we are at top level */
- if (!paren && *patendseg == '/') {
+ if (!paren && zpc_special[ZPC_SLASH] == '/') {
tilde++;
- patendseg++;
- patendseglen--;
- patendstr++;
- patendstrlen--;
+ zpc_special[ZPC_SLASH] = Marker;
}
} else {
excsync = 0;
@@ -784,10 +807,7 @@ patcompswitch(int paren, int *flagp)
newbr = patcompbranch(&flags);
if (tilde == 2) {
/* restore special treatment of / */
- patendseg--;
- patendseglen++;
- patendstr--;
- patendstrlen++;
+ zpc_special[ZPC_SLASH] = '/';
}
if (!newbr)
return 0;
@@ -855,14 +875,13 @@ patcompbranch(int *flagp)
*flagp = P_PURESTR;
starter = chain = 0;
- while (!memchr(patendseg, *patparse, patendseglen) ||
- (*patparse == Tilde && patparse[1] != '/' &&
- memchr(patendseg, patparse[1], patendseglen))) {
- if (isset(EXTENDEDGLOB) &&
- ((!isset(SHGLOB) &&
- (*patparse == Inpar && patparse[1] == Pound)) ||
- (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar &&
- patparse[2] == Pound))) {
+ while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
+ (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
+ memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
+ if ((*patparse == zpc_special[ZPC_INPAR] &&
+ patparse[1] == zpc_special[ZPC_HASH]) ||
+ (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
+ patparse[2] == zpc_special[ZPC_HASH])) {
/* Globbing flags. */
char *pp1 = patparse;
int oldglobflags = patglobflags, ignore;
@@ -910,7 +929,7 @@ patcompbranch(int *flagp)
break;
else
continue;
- } else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
+ } else if (*patparse == zpc_special[ZPC_HAT]) {
/*
* ^pat: anything but pat. For proper backtracking,
* etc., we turn this into (*~pat), except without the
@@ -1171,7 +1190,7 @@ patcomppiece(int *flagp)
{
long starter = 0, next, op, opnd;
int flags, flags2, kshchar, len, ch, patch, nmeta;
- int pound, count;
+ int hash, count;
union upat up;
char *nptr, *str0, *ptr, *patprev;
zrange_t from, to;
@@ -1185,11 +1204,17 @@ patcomppiece(int *flagp)
* the string doesn't introduce a ksh-like parenthesized expression.
*/
kshchar = '\0';
- if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
- if (strchr("?*+!@", *patparse))
- kshchar = STOUC(*patparse);
- else if (*patparse == Star || *patparse == Quest)
- kshchar = STOUC(ztokens[*patparse - Pound]);
+ if (*patparse && patparse[1] == Inpar) {
+ if (*patparse == zpc_special[ZPC_KSH_PLUS])
+ kshchar = STOUC('+');
+ else if (*patparse == zpc_special[ZPC_KSH_BANG])
+ kshchar = STOUC('!');
+ else if (*patparse == zpc_special[ZPC_KSH_AT])
+ kshchar = STOUC('@');
+ else if (*patparse == zpc_special[ZPC_KSH_STAR])
+ kshchar = STOUC('*');
+ else if (*patparse == zpc_special[ZPC_KSH_QUEST])
+ kshchar = STOUC('?');
}
/*
@@ -1199,10 +1224,10 @@ patcomppiece(int *flagp)
* tildes are not special if there is nothing following to
* be excluded.
*/
- if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
- (*patparse != Tilde ||
+ if (kshchar || (memchr(zpc_special, *patparse, ZPC_COUNT) &&
+ (*patparse != zpc_special[ZPC_TILDE] ||
patparse[1] == '/' ||
- !memchr(patendseg, patparse[1], patendseglen))))
+ !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))))
break;
/* Remember the previous character for backtracking */
@@ -1227,10 +1252,14 @@ patcomppiece(int *flagp)
* If we have more than one character, a following hash
* or (#c...) only applies to the last, so backtrack one character.
*/
- if (isset(EXTENDEDGLOB) &&
- (*patparse == Pound ||
- (*patparse == Inpar && patparse[1] == Pound &&
- patparse[2] == 'c')) && morelen)
+ if ((*patparse == zpc_special[ZPC_HASH] ||
+ (*patparse == zpc_special[ZPC_INPAR] &&
+ patparse[1] == zpc_special[ZPC_HASH] &&
+ patparse[2] == 'c') ||
+ (*patparse == zpc_special[ZPC_KSH_AT] &&
+ patparse[1] == Inpar &&
+ patparse[2] == zpc_special[ZPC_HASH] &&
+ patparse[3] == 'c')) && morelen)
patparse = patprev;
/*
* If len is 1, we can't have an active # following, so doesn't
@@ -1306,15 +1335,21 @@ patcomppiece(int *flagp)
METACHARINC(patparse);
switch(patch) {
case Quest:
+ DPUTS(zpc_special[ZPC_QUEST] == Marker,
+ "Treating '?' as pattern character although disabled");
flags |= P_SIMPLE;
starter = patnode(P_ANY);
break;
case Star:
+ DPUTS(zpc_special[ZPC_STAR] == Marker,
+ "Treating '*' as pattern character although disabled");
/* kshchar is used as a sign that we can't have #'s. */
kshchar = -1;
starter = patnode(P_STAR);
break;
case Inbrack:
+ DPUTS(zpc_special[ZPC_INBRACK] == Marker,
+ "Treating '[' as pattern character although disabled");
flags |= P_SIMPLE;
if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
patparse++;
@@ -1368,9 +1403,10 @@ patcomppiece(int *flagp)
patadd(NULL, 0, 1, 0);
break;
case Inpar:
- /* is this how to treat parentheses in SHGLOB? */
- if (isset(SHGLOB) && !kshchar)
- return 0;
+ DPUTS(zpc_special[ZPC_INPAR] == Marker,
+ "Treating '(' as pattern character although disabled");
+ DPUTS(isset(SHGLOB) && !kshchar,
+ "Treating bare '(' as pattern character with SHGLOB");
if (kshchar == '!') {
/* This is nasty, we should really either handle all
* kshglobbing below or here. But most of the
@@ -1393,6 +1429,9 @@ patcomppiece(int *flagp)
break;
case Inang:
/* Numeric glob */
+ DPUTS(zpc_special[ZPC_INANG] == Marker,
+ "Treating '<' as pattern character although disabled");
+ DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
len = 0; /* beginning present 1, end present 2 */
if (idigit(*patparse)) {
from = (zrange_t) zstrtol((char *)patparse,
@@ -1435,6 +1474,8 @@ patcomppiece(int *flagp)
*/
break;
case Pound:
+ DPUTS(zpc_special[ZPC_HASH] == Marker,
+ "Treating '#' as pattern character although disabled");
DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
/*
* A hash here is an error; it should follow something
@@ -1465,16 +1506,21 @@ patcomppiece(int *flagp)
}
count = 0;
- if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
- !(count = (isset(EXTENDEDGLOB) && *patparse == Inpar &&
- patparse[1] == Pound && patparse[2] == 'c')) &&
+ if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
+ !(count = ((*patparse == zpc_special[ZPC_INPAR] &&
+ patparse[1] == zpc_special[ZPC_HASH] &&
+ patparse[2] == 'c') ||
+ (*patparse == zpc_special[ZPC_KSH_AT] &&
+ patparse[1] == Inpar &&
+ patparse[2] == zpc_special[ZPC_HASH] &&
+ patparse[3] == 'c'))) &&
(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
*flagp = flags;
return starter;
}
/* too much at once doesn't currently work */
- if (kshchar && (pound || count))
+ if (kshchar && (hash || count))
return 0;
if (kshchar == '*') {
@@ -1490,7 +1536,7 @@ patcomppiece(int *flagp)
op = P_COUNT;
patparse += 3;
*flagp = P_HSTART;
- } else if (*++patparse == Pound) {
+ } else if (*++patparse == zpc_special[ZPC_HASH]) {
op = P_TWOHASH;
patparse++;
*flagp = P_HSTART;
@@ -1600,7 +1646,7 @@ patcomppiece(int *flagp)
pattail(starter, next);
patoptail(starter, next);
}
- if (*patparse == Pound)
+ if (*patparse == zpc_special[ZPC_HASH])
return 0;
return starter;
diff --git a/Src/zsh.h b/Src/zsh.h
index f247563d4..639c2b746 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -179,7 +179,11 @@ struct mathfunc {
* Take care to update the use of IMETA appropriately when adding
* tokens here.
*/
-/* Marker used in paramsubst for rc_expand_param */
+/*
+ * Marker used in paramsubst for rc_expand_param.
+ * Also used in pattern character arrays as guaranteed not to
+ * mark a character in a string.
+ */
#define Marker ((char) 0xa0)
/* chars that need to be quoted if meant literally */
@@ -1375,6 +1379,40 @@ struct patprog {
#define PAT_HAS_EXCLUDP 0x0800 /* (internal): top-level path1~path2. */
#define PAT_LCMATCHUC 0x1000 /* equivalent to setting (#l) */
+/**
+ * Indexes into the array of active pattern characters.
+ * This must match the array zpc_chars in pattern.c.
+ */
+enum zpc_chars {
+ /*
+ * These characters both terminate a pattern segment and
+ * a pure string segment.
+ */
+ ZPC_SLASH, /* / active as file separator */
+ ZPC_NULL, /* \0 as string terminator */
+ ZPC_BAR, /* | for "or" */
+ ZPC_OUTPAR, /* ) for grouping */
+ ZPC_TILDE, /* ~ for exclusion (extended glob) */
+ ZPC_SEG_COUNT, /* No. of the above characters */
+ /*
+ * These characters terminate a pure string segment.
+ */
+ ZPC_INPAR = ZPC_SEG_COUNT, /* ( for grouping */
+ ZPC_QUEST, /* ? as wildcard */
+ ZPC_STAR, /* * as wildcard */
+ ZPC_INBRACK, /* [ for character class */
+ ZPC_INANG, /* < for numeric glob */
+ ZPC_HAT, /* ^ for exclusion (extended glob) */
+ ZPC_HASH, /* # for repetition (extended glob) */
+ ZPC_BNULLKEEP, /* Special backslashed null not removed */
+ ZPC_KSH_QUEST, /* ? for ?(...) in KSH_GLOB */
+ ZPC_KSH_STAR, /* * for *(...) in KSH_GLOB */
+ ZPC_KSH_PLUS, /* + for +(...) in KSH_GLOB */
+ ZPC_KSH_BANG, /* ! for !(...) in KSH_GLOB */
+ ZPC_KSH_AT, /* @ for @(...) in KSH_GLOB */
+ ZPC_COUNT /* Number of special chararacters */
+};
+
/*
* Special match types used in character classes. These
* are represented as tokens, with Meta added. The character
--
cgit 1.4.1