From 68d0d76db55c0b8778f0b68d3eda54060b576c41 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Sat, 1 Jun 2013 20:39:09 +0100 Subject: 31441: use array to decide which forms of pattern are enabled --- Src/glob.c | 20 +++--- Src/pattern.c | 208 +++++++++++++++++++++++++++++++++++----------------------- Src/zsh.h | 40 ++++++++++- 3 files changed, 177 insertions(+), 91 deletions(-) (limited to 'Src') diff --git a/Src/glob.c b/Src/glob.c index ca2ffaf51..db86d2468 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -708,8 +708,9 @@ parsecomplist(char *instr) } /* Parse repeated directories such as (dir/)# and (dir/)## */ - if (*(str = instr) == Inpar && !skipparens(Inpar, Outpar, (char **)&str) && - *str == Pound && isset(EXTENDEDGLOB) && str[-2] == '/') { + if (*(str = instr) == zpc_special[ZPC_INPAR] && + !skipparens(Inpar, Outpar, (char **)&str) && + *str == zpc_special[ZPC_HASH] && str[-2] == '/') { instr++; if (!(p1 = patcompile(instr, compflags, &instr))) return NULL; @@ -761,9 +762,9 @@ parsepat(char *str) * Check for initial globbing flags, so that they don't form * a bogus path component. */ - if ((*str == Inpar && str[1] == Pound && isset(EXTENDEDGLOB)) || - (isset(KSHGLOB) && *str == '@' && str[1] == Inpar && - str[2] == Pound)) { + if ((*str == zpc_special[ZPC_INPAR] && str[1] == zpc_special[ZPC_HASH]) || + (*str == zpc_special[ZPC_KSH_AT] && str[1] == Inpar && + str[2] == zpc_special[ZPC_HASH])) { str += (*str == Inpar) ? 2 : 3; if (!patgetglobflags(&str, &assert, &ignore)) return NULL; @@ -1146,7 +1147,7 @@ zglob(LinkList list, LinkNode np, int nountok) gf_pre_words = NULL; /* Check for qualifiers */ - while (!nobareglob || isset(EXTENDEDGLOB)) { + while (!nobareglob || zpc_special[ZPC_HASH] != Marker) { struct qual *newquals; char *s; int sense, paren; @@ -1192,10 +1193,11 @@ zglob(LinkList list, LinkNode np, int nountok) case Outpar: paren++; /*FALLTHROUGH*/ case Bar: - nobareglob = 1; + if (zpc_special[ZPC_BAR] != Marker) + nobareglob = 1; break; case Tilde: - if (isset(EXTENDEDGLOB)) + if (zpc_special[ZPC_TILDE] != Marker) nobareglob = 1; break; case Inpar: @@ -1205,7 +1207,7 @@ zglob(LinkList list, LinkNode np, int nountok) } if (*s != Inpar) break; - if (isset(EXTENDEDGLOB) && s[1] == Pound) { + if (s[1] == zpc_special[ZPC_HASH]) { if (s[2] == 'q') { *s = 0; s += 2; diff --git a/Src/pattern.c b/Src/pattern.c index 3b6edb850..54d6e7cb3 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -225,34 +225,27 @@ typedef unsigned long zrange_t; #endif /* - * Characters which terminate a pattern segment. We actually use - * a pointer patendseg which skips the first character if we are not - * parsing a file pattern. - * Note that the size of this and the next array are hard-wired - * via the definitions. + * Array of characters corresponding to zpc_chars enum, which it must match. */ - -static char endseg[] = { - '/', /* file only */ - '\0', Bar, Outpar, /* all patterns */ - Tilde /* extended glob only */ +static const char zpc_chars[ZPC_COUNT] = { + '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang, + Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@' }; -#define PATENDSEGLEN_NORM 4 -#define PATENDSEGLEN_EXT 5 - -/* Characters which terminate a simple string */ - -static char endstr[] = { - '/', /* file only */ - '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep, - /* all patterns */ - Tilde, Hat, Pound /* extended glob only */ -}; - -#define PATENDSTRLEN_NORM 10 -#define PATENDSTRLEN_EXT 13 +/* + * Characters which terminate a simple string (ZPC_COUNT) or + * an entire pattern segment (the first ZPC_SEG_COUNT). + * Each entry is either the corresponding character in zpc_chars + * or Marker which is guaranteed not to match a character in a + * pattern we are compiling. + * + * The complete list indicates characters that are special, so e.g. + * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde + * *and* Tilde is currently special. + */ +/**/ +char zpc_special[ZPC_COUNT]; /* Default size for pattern buffer */ #define P_DEF_ALLOC 256 @@ -264,10 +257,6 @@ static char *patcode; /* point of code emission */ static long patsize; /* size of code */ static char *patout; /* start of code emission string */ static long patalloc; /* size allocated for same */ -static char *patendseg; /* characters ending segment */ -static int patendseglen; /* length of same */ -static char *patendstr; /* characters ending plain string */ -static int patendstrlen; /* length of sameo */ /* Flags used in both compilation and execution */ static int patflags; /* flags passed down to patcompile */ @@ -417,12 +406,56 @@ static long rn_offs; (P_OP(p) == P_BACK) ? \ ((p)-rn_offs) : ((p)+rn_offs) : NULL) +/* + * Set up zpc_special with characters that end a string segment. + * "Marker" cannot occur in the pattern we are compiling so + * is used to mark "invalid". + */ +static void +patcompcharsset(void) +{ + memcpy(zpc_special, zpc_chars, ZPC_COUNT); + if (!isset(EXTENDEDGLOB)) { + /* Extended glob characters are not active */ + zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] = + zpc_special[ZPC_HASH] = Marker; + } + if (!isset(KSHGLOB)) { + /* + * Ksh glob characters are not active. + * * and ? are shared with normal globbing, but for their + * use here we are looking for a following Inpar. + */ + zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] = + zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] = + zpc_special[ZPC_KSH_AT] = Marker; + } + /* + * Note that if we are using KSHGLOB, then we test for a following + * Inpar, not zpc_special[ZPC_INPAR]: the latter makes an Inpar on + * its own active. The zpc_special[ZPC_KSH_*] followed by any old Inpar + * discriminate ksh globbing. + */ + if (isset(SHGLOB)) { + /* + * Grouping and numeric ranges are not valid. + * We do allow alternation, however; it's needed for + * "case". This may not be entirely consistent. + * + * Don't disable Outpar: we may need to match the end of KSHGLOB + * parentheses and it would be difficult to tell them apart. + */ + zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker; + } +} + /* Called before parsing a set of file matchs to initialize flags */ /**/ void patcompstart(void) { + patcompcharsset(); if (isset(CASEGLOB)) patglobflags = 0; else @@ -469,16 +502,9 @@ patcompile(char *exp, int inflags, char **endexp) patnpar = 1; patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP); - patendseg = endseg; - patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM; - patendstr = endstr; - patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM; - if (!(patflags & PAT_FILE)) { - patendseg++; - patendstr++; - patendseglen--; - patendstrlen--; + patcompcharsset(); + zpc_special[ZPC_SLASH] = Marker; remnulargs(patparse); if (isset(MULTIBYTE)) patglobflags = GF_MULTIBYTE; @@ -698,11 +724,11 @@ patcompswitch(int paren, int *flagp) *flagp |= flags & (P_HSTART|P_PURESTR); - while (*patparse == Bar || - (isset(EXTENDEDGLOB) && *patparse == Tilde && + while (*patparse == zpc_chars[ZPC_BAR] || + (*patparse == zpc_special[ZPC_TILDE] && (patparse[1] == '/' || - !memchr(patendseg, patparse[1], patendseglen)))) { - int tilde = *patparse++ == Tilde; + !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) { + int tilde = *patparse++ == zpc_special[ZPC_TILDE]; long gfnode = 0, newbr; *flagp &= ~P_PURESTR; @@ -739,12 +765,9 @@ patcompswitch(int paren, int *flagp) up.p = NULL; patadd((char *)&up, 0, sizeof(up), 0); /* / is not treated as special if we are at top level */ - if (!paren && *patendseg == '/') { + if (!paren && zpc_special[ZPC_SLASH] == '/') { tilde++; - patendseg++; - patendseglen--; - patendstr++; - patendstrlen--; + zpc_special[ZPC_SLASH] = Marker; } } else { excsync = 0; @@ -784,10 +807,7 @@ patcompswitch(int paren, int *flagp) newbr = patcompbranch(&flags); if (tilde == 2) { /* restore special treatment of / */ - patendseg--; - patendseglen++; - patendstr--; - patendstrlen++; + zpc_special[ZPC_SLASH] = '/'; } if (!newbr) return 0; @@ -855,14 +875,13 @@ patcompbranch(int *flagp) *flagp = P_PURESTR; starter = chain = 0; - while (!memchr(patendseg, *patparse, patendseglen) || - (*patparse == Tilde && patparse[1] != '/' && - memchr(patendseg, patparse[1], patendseglen))) { - if (isset(EXTENDEDGLOB) && - ((!isset(SHGLOB) && - (*patparse == Inpar && patparse[1] == Pound)) || - (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar && - patparse[2] == Pound))) { + while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) || + (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' && + memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) { + if ((*patparse == zpc_special[ZPC_INPAR] && + patparse[1] == zpc_special[ZPC_HASH]) || + (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar && + patparse[2] == zpc_special[ZPC_HASH])) { /* Globbing flags. */ char *pp1 = patparse; int oldglobflags = patglobflags, ignore; @@ -910,7 +929,7 @@ patcompbranch(int *flagp) break; else continue; - } else if (isset(EXTENDEDGLOB) && *patparse == Hat) { + } else if (*patparse == zpc_special[ZPC_HAT]) { /* * ^pat: anything but pat. For proper backtracking, * etc., we turn this into (*~pat), except without the @@ -1171,7 +1190,7 @@ patcomppiece(int *flagp) { long starter = 0, next, op, opnd; int flags, flags2, kshchar, len, ch, patch, nmeta; - int pound, count; + int hash, count; union upat up; char *nptr, *str0, *ptr, *patprev; zrange_t from, to; @@ -1185,11 +1204,17 @@ patcomppiece(int *flagp) * the string doesn't introduce a ksh-like parenthesized expression. */ kshchar = '\0'; - if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) { - if (strchr("?*+!@", *patparse)) - kshchar = STOUC(*patparse); - else if (*patparse == Star || *patparse == Quest) - kshchar = STOUC(ztokens[*patparse - Pound]); + if (*patparse && patparse[1] == Inpar) { + if (*patparse == zpc_special[ZPC_KSH_PLUS]) + kshchar = STOUC('+'); + else if (*patparse == zpc_special[ZPC_KSH_BANG]) + kshchar = STOUC('!'); + else if (*patparse == zpc_special[ZPC_KSH_AT]) + kshchar = STOUC('@'); + else if (*patparse == zpc_special[ZPC_KSH_STAR]) + kshchar = STOUC('*'); + else if (*patparse == zpc_special[ZPC_KSH_QUEST]) + kshchar = STOUC('?'); } /* @@ -1199,10 +1224,10 @@ patcomppiece(int *flagp) * tildes are not special if there is nothing following to * be excluded. */ - if (kshchar || (memchr(patendstr, *patparse, patendstrlen) && - (*patparse != Tilde || + if (kshchar || (memchr(zpc_special, *patparse, ZPC_COUNT) && + (*patparse != zpc_special[ZPC_TILDE] || patparse[1] == '/' || - !memchr(patendseg, patparse[1], patendseglen)))) + !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) break; /* Remember the previous character for backtracking */ @@ -1227,10 +1252,14 @@ patcomppiece(int *flagp) * If we have more than one character, a following hash * or (#c...) only applies to the last, so backtrack one character. */ - if (isset(EXTENDEDGLOB) && - (*patparse == Pound || - (*patparse == Inpar && patparse[1] == Pound && - patparse[2] == 'c')) && morelen) + if ((*patparse == zpc_special[ZPC_HASH] || + (*patparse == zpc_special[ZPC_INPAR] && + patparse[1] == zpc_special[ZPC_HASH] && + patparse[2] == 'c') || + (*patparse == zpc_special[ZPC_KSH_AT] && + patparse[1] == Inpar && + patparse[2] == zpc_special[ZPC_HASH] && + patparse[3] == 'c')) && morelen) patparse = patprev; /* * If len is 1, we can't have an active # following, so doesn't @@ -1306,15 +1335,21 @@ patcomppiece(int *flagp) METACHARINC(patparse); switch(patch) { case Quest: + DPUTS(zpc_special[ZPC_QUEST] == Marker, + "Treating '?' as pattern character although disabled"); flags |= P_SIMPLE; starter = patnode(P_ANY); break; case Star: + DPUTS(zpc_special[ZPC_STAR] == Marker, + "Treating '*' as pattern character although disabled"); /* kshchar is used as a sign that we can't have #'s. */ kshchar = -1; starter = patnode(P_STAR); break; case Inbrack: + DPUTS(zpc_special[ZPC_INBRACK] == Marker, + "Treating '[' as pattern character although disabled"); flags |= P_SIMPLE; if (*patparse == Hat || *patparse == '^' || *patparse == '!') { patparse++; @@ -1368,9 +1403,10 @@ patcomppiece(int *flagp) patadd(NULL, 0, 1, 0); break; case Inpar: - /* is this how to treat parentheses in SHGLOB? */ - if (isset(SHGLOB) && !kshchar) - return 0; + DPUTS(zpc_special[ZPC_INPAR] == Marker, + "Treating '(' as pattern character although disabled"); + DPUTS(isset(SHGLOB) && !kshchar, + "Treating bare '(' as pattern character with SHGLOB"); if (kshchar == '!') { /* This is nasty, we should really either handle all * kshglobbing below or here. But most of the @@ -1393,6 +1429,9 @@ patcomppiece(int *flagp) break; case Inang: /* Numeric glob */ + DPUTS(zpc_special[ZPC_INANG] == Marker, + "Treating '<' as pattern character although disabled"); + DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB"); len = 0; /* beginning present 1, end present 2 */ if (idigit(*patparse)) { from = (zrange_t) zstrtol((char *)patparse, @@ -1435,6 +1474,8 @@ patcomppiece(int *flagp) */ break; case Pound: + DPUTS(zpc_special[ZPC_HASH] == Marker, + "Treating '#' as pattern character although disabled"); DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string"); /* * A hash here is an error; it should follow something @@ -1465,16 +1506,21 @@ patcomppiece(int *flagp) } count = 0; - if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) && - !(count = (isset(EXTENDEDGLOB) && *patparse == Inpar && - patparse[1] == Pound && patparse[2] == 'c')) && + if (!(hash = (*patparse == zpc_special[ZPC_HASH])) && + !(count = ((*patparse == zpc_special[ZPC_INPAR] && + patparse[1] == zpc_special[ZPC_HASH] && + patparse[2] == 'c') || + (*patparse == zpc_special[ZPC_KSH_AT] && + patparse[1] == Inpar && + patparse[2] == zpc_special[ZPC_HASH] && + patparse[3] == 'c'))) && (kshchar <= 0 || kshchar == '@' || kshchar == '!')) { *flagp = flags; return starter; } /* too much at once doesn't currently work */ - if (kshchar && (pound || count)) + if (kshchar && (hash || count)) return 0; if (kshchar == '*') { @@ -1490,7 +1536,7 @@ patcomppiece(int *flagp) op = P_COUNT; patparse += 3; *flagp = P_HSTART; - } else if (*++patparse == Pound) { + } else if (*++patparse == zpc_special[ZPC_HASH]) { op = P_TWOHASH; patparse++; *flagp = P_HSTART; @@ -1600,7 +1646,7 @@ patcomppiece(int *flagp) pattail(starter, next); patoptail(starter, next); } - if (*patparse == Pound) + if (*patparse == zpc_special[ZPC_HASH]) return 0; return starter; diff --git a/Src/zsh.h b/Src/zsh.h index f247563d4..639c2b746 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -179,7 +179,11 @@ struct mathfunc { * Take care to update the use of IMETA appropriately when adding * tokens here. */ -/* Marker used in paramsubst for rc_expand_param */ +/* + * Marker used in paramsubst for rc_expand_param. + * Also used in pattern character arrays as guaranteed not to + * mark a character in a string. + */ #define Marker ((char) 0xa0) /* chars that need to be quoted if meant literally */ @@ -1375,6 +1379,40 @@ struct patprog { #define PAT_HAS_EXCLUDP 0x0800 /* (internal): top-level path1~path2. */ #define PAT_LCMATCHUC 0x1000 /* equivalent to setting (#l) */ +/** + * Indexes into the array of active pattern characters. + * This must match the array zpc_chars in pattern.c. + */ +enum zpc_chars { + /* + * These characters both terminate a pattern segment and + * a pure string segment. + */ + ZPC_SLASH, /* / active as file separator */ + ZPC_NULL, /* \0 as string terminator */ + ZPC_BAR, /* | for "or" */ + ZPC_OUTPAR, /* ) for grouping */ + ZPC_TILDE, /* ~ for exclusion (extended glob) */ + ZPC_SEG_COUNT, /* No. of the above characters */ + /* + * These characters terminate a pure string segment. + */ + ZPC_INPAR = ZPC_SEG_COUNT, /* ( for grouping */ + ZPC_QUEST, /* ? as wildcard */ + ZPC_STAR, /* * as wildcard */ + ZPC_INBRACK, /* [ for character class */ + ZPC_INANG, /* < for numeric glob */ + ZPC_HAT, /* ^ for exclusion (extended glob) */ + ZPC_HASH, /* # for repetition (extended glob) */ + ZPC_BNULLKEEP, /* Special backslashed null not removed */ + ZPC_KSH_QUEST, /* ? for ?(...) in KSH_GLOB */ + ZPC_KSH_STAR, /* * for *(...) in KSH_GLOB */ + ZPC_KSH_PLUS, /* + for +(...) in KSH_GLOB */ + ZPC_KSH_BANG, /* ! for !(...) in KSH_GLOB */ + ZPC_KSH_AT, /* @ for @(...) in KSH_GLOB */ + ZPC_COUNT /* Number of special chararacters */ +}; + /* * Special match types used in character classes. These * are represented as tokens, with Meta added. The character -- cgit 1.4.1