diff options
author | Peter Stephenson <pws@zsh.org> | 2017-03-07 10:43:58 +0000 |
---|---|---|
committer | Peter Stephenson <pws@zsh.org> | 2017-03-07 10:43:58 +0000 |
commit | f3f8537cfa05414ad14494e809d9ebfeef86ebbc (patch) | |
tree | cfb02314dd129609ef3d6fc85ce75fc63b8c9582 /Src | |
parent | a8345a40b1a79bb3a5c524ccf5fedf78040ae40e (diff) | |
download | zsh-f3f8537cfa05414ad14494e809d9ebfeef86ebbc.tar.gz zsh-f3f8537cfa05414ad14494e809d9ebfeef86ebbc.tar.xz zsh-f3f8537cfa05414ad14494e809d9ebfeef86ebbc.zip |
40760: Always tokenize unquoted - to Dash.
This fixes use of pattern match character ranges in unusual contexts. Attempt to detect a tokenized - in cases where we don't care.
Diffstat (limited to 'Src')
-rw-r--r-- | Src/cond.c | 8 | ||||
-rw-r--r-- | Src/exec.c | 11 | ||||
-rw-r--r-- | Src/glob.c | 23 | ||||
-rw-r--r-- | Src/lex.c | 16 | ||||
-rw-r--r-- | Src/math.c | 7 | ||||
-rw-r--r-- | Src/parse.c | 41 | ||||
-rw-r--r-- | Src/pattern.c | 2 | ||||
-rw-r--r-- | Src/subst.c | 41 | ||||
-rw-r--r-- | Src/utils.c | 10 | ||||
-rw-r--r-- | Src/zsh.h | 10 |
10 files changed, 102 insertions, 67 deletions
diff --git a/Src/cond.c b/Src/cond.c index 8ab019307..9b739f6c1 100644 --- a/Src/cond.c +++ b/Src/cond.c @@ -138,13 +138,13 @@ evalcond(Estate state, char *fromtest) strs = arrdup(sbuf); l = 2; } - if (name && name[0] == '-') + if (name && IS_DASH(name[0])) errname = name; - else if (strs[0] && *strs[0] == '-') + else if (strs[0] && IS_DASH(*strs[0])) errname = strs[0]; else errname = "<null>"; - if (name && name[0] == '-' && + if (name && IS_DASH(name[0]) && (cd = getconddef((ctype == COND_MODI), name + 1, 1))) { if (ctype == COND_MOD && (l < cd->min || (cd->max >= 0 && l > cd->max))) { @@ -171,7 +171,7 @@ evalcond(Estate state, char *fromtest) strs[0] = dupstring(name); name = s; - if (name && name[0] == '-' && + if (name && IS_DASH(name[0]) && (cd = getconddef(0, name + 1, 1))) { if (l < cd->min || (cd->max >= 0 && l > cd->max)) { zwarnnam(fromtest, "unknown condition: %s", diff --git a/Src/exec.c b/Src/exec.c index 6af4ddbf3..8b3224652 100644 --- a/Src/exec.c +++ b/Src/exec.c @@ -2779,9 +2779,10 @@ execcmd_exec(Estate state, Execcmd_params eparams, char *argdata = (char *) getdata(argnode); char *cmdopt; int has_p = 0, has_vV = 0, has_other = 0; - while (*argdata == '-') { + while (IS_DASH(*argdata)) { /* Just to be definite, stop on single "-", too, */ - if (!argdata[1] || (argdata[1] == '-' && !argdata[2])) + if (!argdata[1] || + (IS_DASH(argdata[1]) && !argdata[2])) break; for (cmdopt = argdata+1; *cmdopt; cmdopt++) { switch (*cmdopt) { @@ -2835,7 +2836,7 @@ execcmd_exec(Estate state, Execcmd_params eparams, * as if this is command [non-option-stuff]. This * isn't a good place for standard option handling. */ - if (!strcmp(argdata, "--")) + if (IS_DASH(argdata[0]) && IS_DASH(argdata[1]) && !argdata[2]) uremnode(args, firstnode(args)); } if ((cflags & BINF_EXEC) && nextnode(firstnode(args))) { @@ -2855,7 +2856,7 @@ execcmd_exec(Estate state, Execcmd_params eparams, * people aren't likely to mix the option style * with the zsh style. */ - while (next && *next == '-' && strlen(next) >= 2) { + while (next && IS_DASH(*next) && strlen(next) >= 2) { if (!firstnode(args)) { zerr("exec requires a command to execute"); lastval = 1; @@ -2863,7 +2864,7 @@ execcmd_exec(Estate state, Execcmd_params eparams, goto done; } uremnode(args, firstnode(args)); - if (!strcmp(next, "--")) + if (IS_DASH(next[0]) && IS_DASH(next[1]) && !next[2]) break; for (cmdopt = &next[1]; *cmdopt; ++cmdopt) { switch (*cmdopt) { diff --git a/Src/glob.c b/Src/glob.c index ff6b2583b..87127e15f 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -1314,6 +1314,7 @@ zglob(LinkList list, LinkNode np, int nountok) sense ^= 1; break; case '-': + case Dash: /* Toggle matching of symbolic links */ sense ^= 2; break; @@ -1608,7 +1609,7 @@ zglob(LinkList list, LinkNode np, int nountok) ++s; } /* See if it's greater than, equal to, or less than */ - if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0)) + if ((g_range = *s == '+' ? 1 : IS_DASH(*s) ? -1 : 0)) ++s; data = qgetnum(&s); break; @@ -2025,13 +2026,13 @@ hasbraces(char *str) if (bracechardots(str-1, NULL, NULL)) return 1; lbr = str - 1; - if (*str == '-') + if (IS_DASH(*str)) str++; while (idigit(*str)) str++; if (*str == '.' && str[1] == '.') { str++; str++; - if (*str == '-') + if (IS_DASH(*str)) str++; while (idigit(*str)) str++; @@ -2040,7 +2041,7 @@ hasbraces(char *str) return 1; else if (*str == '.' && str[1] == '.') { str++; str++; - if (*str == '-') + if (IS_DASH(*str)) str++; while (idigit(*str)) str++; @@ -2123,7 +2124,7 @@ xpandredir(struct redir *fn, LinkList redirtab) fn->name = s; untokenize(s); if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) { - if (s[0] == '-' && !s[1]) + if (IS_DASH(s[0]) && !s[1]) fn->type = REDIR_CLOSE; else if (s[0] == 'p' && !s[1]) fn->fd2 = -2; @@ -2329,12 +2330,14 @@ xpandbraces(LinkList list, LinkNode *np) * str+1 is the first number in the range, dots+2 the last, * and dots2+2 is the increment if that's given. */ /* TODO: sorry about this */ - int minw = (str[1] == '0' || (str[1] == '-' && str[2] == '0')) + int minw = (str[1] == '0' || + (IS_DASH(str[1]) && str[2] == '0')) ? wid1 - : (dots[2] == '0' || (dots[2] == '-' && dots[3] == '0')) + : (dots[2] == '0' || + (IS_DASH(dots[2]) && dots[3] == '0')) ? wid2 : (dots2 && (dots2[2] == '0' || - (dots2[2] == '-' && dots2[3] == '0'))) + (IS_DASH(dots2[2]) && dots2[3] == '0'))) ? wid3 : 0; if (rincr < 0) { @@ -2392,7 +2395,7 @@ xpandbraces(LinkList list, LinkNode *np) c2 = ztokens[c2 - STOUC(Pound)]; if ((char) c2 == Meta) c2 = 32 ^ p[1]; - if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) { + if (IS_DASH(c1) && lastch >= 0 && p < str2 && lastch <= (int)c2) { while (lastch < (int)c2) ccl[lastch++] = 1; lastch = -1; @@ -3528,7 +3531,7 @@ zshtokenize(char *s, int flags) } t = s; while (idigit(*++s)); - if (*s != '-') + if (!IS_DASH(*s)) goto cont; while (idigit(*++s)); if (*s != '>') diff --git a/Src/lex.c b/Src/lex.c index 889612825..59e9d1472 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -1359,17 +1359,13 @@ gettokstr(int c, int sub) case LX2_DASH: /* * - shouldn't be treated as a special character unless - * we're in a pattern. Howeve,simply counting "[" doesn't - * work as []a-z] is a valid expression and we don't know - * down here what this "[" is for as $foo[stuff] is valid - * in zsh. So just detect an opening [, which is enough - * to turn this into a pattern; the Dash will be harmlessly - * untokenised if not wanted. + * we're in a pattern. Unfortunately, working out for + * sure in complicated expressions whether we're in a + * pattern is tricky. So we'll make it special and + * turn it back any time we don't need it special. + * This is not ideal as it's a lot of work. */ - if (seen_brct) - c = Dash; - else - c = '-'; + c = Dash; break; case LX2_BANG: /* diff --git a/Src/math.c b/Src/math.c index f19c0ed61..f9613001a 100644 --- a/Src/math.c +++ b/Src/math.c @@ -463,7 +463,7 @@ lexconstant(void) char *nptr; nptr = ptr; - if (*nptr == '-') + if (IS_DASH(*nptr)) nptr++; if (*nptr == '0') { @@ -527,7 +527,7 @@ lexconstant(void) } if (*nptr == 'e' || *nptr == 'E') { nptr++; - if (*nptr == '+' || *nptr == '-') + if (*nptr == '+' || IS_DASH(*nptr)) nptr++; while (idigit(*nptr) || *nptr == '_') nptr++; @@ -599,7 +599,8 @@ zzlex(void) } return (unary) ? UPLUS : PLUS; case '-': - if (*ptr == '-') { + case Dash: + if (IS_DASH(*ptr)) { ptr++; return (unary) ? PREMINUS : POSTMINUS; } diff --git a/Src/parse.c b/Src/parse.c index 699ea49a2..6fe283dcb 100644 --- a/Src/parse.c +++ b/Src/parse.c @@ -2317,6 +2317,19 @@ par_cond_1(void) } /* + * Return 1 if condition matches. This also works for non-elided options. + * + * input is test string, may begin - or Dash. + * cond is condition following the -. + */ +static int check_cond(const char *input, const char *cond) +{ + if (!IS_DASH(input[0])) + return 0; + return !strcmp(input + 1, cond); +} + +/* * cond_2 : BANG cond_2 | INPAR { SEPER } cond_2 { SEPER } OUTPAR | STRING STRING STRING @@ -2342,7 +2355,7 @@ par_cond_2(void) s1 = tokstr; condlex(); /* ksh behavior: [ -t ] means [ -t 1 ]; bash disagrees */ - if (unset(POSIXBUILTINS) && !strcmp(s1, "-t")) + if (unset(POSIXBUILTINS) && check_cond(s1, "t")) return par_cond_double(s1, dupstring("1")); return par_cond_double(dupstring("-n"), s1); } @@ -2352,7 +2365,7 @@ par_cond_2(void) if (!strcmp(*testargs, "=") || !strcmp(*testargs, "==") || !strcmp(*testargs, "!=") || - (**testargs == '-' && get_cond_num(*testargs + 1) >= 0)) { + (IS_DASH(**testargs) && get_cond_num(*testargs + 1) >= 0)) { s1 = tokstr; condlex(); s2 = tokstr; @@ -2374,8 +2387,8 @@ par_cond_2(void) * In "test" compatibility mode, "! -a ..." and "! -o ..." * are treated as "[string] [and] ..." and "[string] [or] ...". */ - if (!(n_testargs > 1 && - (!strcmp(*testargs, "-a") || !strcmp(*testargs, "-o")))) + if (!(n_testargs > 1 && (check_cond(*testargs, "a") || + check_cond(*testargs, "o")))) { condlex(); ecadd(WCB_COND(COND_NOT, 0)); @@ -2397,7 +2410,7 @@ par_cond_2(void) return r; } s1 = tokstr; - dble = (s1 && *s1 == '-' + dble = (s1 && IS_DASH(*s1) && (!n_testargs || strspn(s1+1, "abcdefghknoprstuvwxzLONGS") == 1) && !s1[2]); @@ -2411,7 +2424,7 @@ par_cond_2(void) YYERROR(ecused); } condlex(); - if (n_testargs == 2 && tok != STRING && tokstr && s1[0] == '-') { + if (n_testargs == 2 && tok != STRING && tokstr && IS_DASH(s1[0])) { /* * Something like "test -z" followed by a token. * We'll turn the token into a string (we've also @@ -2446,9 +2459,9 @@ par_cond_2(void) } else YYERROR(ecused); } - s2 = tokstr; + s2 = tokstr; if (!n_testargs) - dble = (s2 && *s2 == '-' && !s2[2]); + dble = (s2 && IS_DASH(*s2) && !s2[2]); incond++; /* parentheses do globbing */ do condlex(); while (COND_SEP()); incond--; /* parentheses do grouping */ @@ -2476,7 +2489,7 @@ par_cond_2(void) static int par_cond_double(char *a, char *b) { - if (a[0] != '-' || !a[1]) + if (!IS_DASH(a[0]) || !a[1]) COND_ERROR("parse error: condition expected: %s", a); else if (!a[2] && strspn(a+1, "abcdefgknoprstuvwxzhLONGS") == 1) { ecadd(WCB_COND(a[1], 0)); @@ -2534,7 +2547,7 @@ par_cond_triple(char *a, char *b, char *c) ecadd(WCB_COND(COND_REGEX, 0)); ecstr(a); ecstr(c); - } else if (b[0] == '-') { + } else if (IS_DASH(b[0])) { if ((t0 = get_cond_num(b + 1)) > -1) { ecadd(WCB_COND(t0 + COND_NT, 0)); ecstr(a); @@ -2545,7 +2558,7 @@ par_cond_triple(char *a, char *b, char *c) ecstr(a); ecstr(c); } - } else if (a[0] == '-' && a[1]) { + } else if (IS_DASH(a[0]) && a[1]) { ecadd(WCB_COND(COND_MOD, 2)); ecstr(a); ecstr(b); @@ -2560,7 +2573,7 @@ par_cond_triple(char *a, char *b, char *c) static int par_cond_multi(char *a, LinkList l) { - if (a[0] != '-' || !a[1]) + if (!IS_DASH(a[0]) || !a[1]) COND_ERROR("condition expected: %s", a); else { LinkNode n; @@ -3256,10 +3269,10 @@ build_dump(char *nam, char *dump, char **files, int ali, int map, int flags) for (hlen = FD_PRELEN, tlen = 0; *files; files++) { struct stat st; - if (!strcmp(*files, "-k")) { + if (check_cond(*files, "k")) { flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_KSHLOAD; continue; - } else if (!strcmp(*files, "-z")) { + } else if (check_cond(*files, "z")) { flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_ZSHLOAD; continue; } diff --git a/Src/pattern.c b/Src/pattern.c index 928790f45..75db01634 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -1521,7 +1521,7 @@ patcomppiece(int *flagp, int paren) patparse = nptr; len |= 1; } - DPUTS(*patparse != '-', "BUG: - missing from numeric glob"); + DPUTS(!IS_DASH(*patparse), "BUG: - missing from numeric glob"); patparse++; if (idigit(*patparse)) { to = (zrange_t) zstrtol((char *)patparse, diff --git a/Src/subst.c b/Src/subst.c index 02dbe2864..2214b3d4f 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -481,6 +481,8 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, for ( ; *x; x += l) { int rawc = -1; convchar_t c; + if (*x == Dash) + *x = '-'; if (itok(STOUC(*x))) { /* token, can't be separator, must be single byte */ rawc = *x; @@ -1766,7 +1768,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, */ c = *s; if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound && - c != '-' && c != '!' && c != '$' && c != String && c != Qstring && + !IS_DASH(c) && + c != '!' && c != '$' && c != String && c != Qstring && c != '?' && c != Quest && c != '*' && c != Star && c != '@' && c != '{' && c != Inbrace && c != '=' && c != Equals && c != Hat && @@ -1895,13 +1898,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, if (quotetype == QT_DOLLARS || quotetype == QT_BACKSLASH_PATTERN) goto flagerr; - if (s[1] == '-' || s[1] == '+') { + if (IS_DASH(s[1]) || s[1] == '+') { if (quotemod) goto flagerr; s++; quotemod = 1; - quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL : - QT_QUOTEDZPUTS; + quotetype = (*s == '+') ? QT_QUOTEDZPUTS : + QT_SINGLE_OPTIONAL; } else { if (quotetype == QT_SINGLE_OPTIONAL) { /* extra q's after '-' not allowed */ @@ -2208,9 +2211,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * properly in the first place we wouldn't * have this nonsense. */ - || ((cc == '#' || cc == Pound) && - s[2] == Outbrace) - || cc == '-' || (cc == ':' && s[2] == '-') + || ((cc == '#' || cc == Pound) && s[2] == Outbrace) + || IS_DASH(cc) + || (cc == ':' && IS_DASH(s[2])) || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) { getlen = 1 + whichlen, s++; /* @@ -2605,14 +2608,17 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * Again, this duplicates tests for characters we're about to * examine properly later on. */ - if (inbrace && - (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' && - c != '=' && c != Equals && - c != '#' && c != Pound && - c != '?' && c != Quest && - c != '}' && c != Outbrace) { - zerr("bad substitution"); - return NULL; + if (inbrace) { + c = *s; + if (!IS_DASH(c) && + c != '+' && c != ':' && c != '%' && c != '/' && + c != '=' && c != Equals && + c != '#' && c != Pound && + c != '?' && c != Quest && + c != '}' && c != Outbrace) { + zerr("bad substitution"); + return NULL; + } } /* * Join arrays up if we're in quotes and there isn't some @@ -2690,8 +2696,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, /* Check for ${..?..} or ${..=..} or one of those. * * Only works if the name is in braces. */ - if (inbrace && ((c = *s) == '-' || - c == '+' || + if (inbrace && ((c = *s) == '+' || + IS_DASH(c) || c == ':' || /* i.e. a doubled colon */ c == '=' || c == Equals || c == '%' || @@ -2802,6 +2808,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, vunset = 1; /* Fall Through! */ case '-': + case Dash: if (vunset) { int split_flags; val = dupstring(s); diff --git a/Src/utils.c b/Src/utils.c index 7f3ddad40..9669944f6 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -2376,7 +2376,7 @@ zstrtol_underscore(const char *s, char **t, int base, int underscore) while (inblank(*s)) s++; - if ((neg = (*s == '-'))) + if ((neg = IS_DASH(*s))) s++; else if (*s == '+') s++; @@ -6118,7 +6118,9 @@ quotedzputs(char const *s, FILE *stream) } else *ptr++ = '\''; while(*s) { - if (*s == Meta) + if (*s == Dash) + c = '-'; + else if (*s == Meta) c = *++s ^ 32; else c = *s; @@ -6155,7 +6157,9 @@ quotedzputs(char const *s, FILE *stream) } else { /* use Bourne-style quoting, avoiding empty quoted strings */ while (*s) { - if (*s == Meta) + if (*s == Dash) + c = '-'; + else if (*s == Meta) c = *++s ^ 32; else c = *s; diff --git a/Src/zsh.h b/Src/zsh.h index f2c279002..10931512d 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -238,6 +238,16 @@ struct mathfunc { #define PATCHARS "#^*()|[]<>?~\\" /* + * Check for a possibly tokenized dash. + * + * A dash only needs to be a token in a character range, [a-z], but + * it's difficult in general to ensure that. So it's turned into + * a token at the usual point in the lexer. However, we need + * to check for a literal dash at many points. + */ +#define IS_DASH(x) ((x) == '-' || (x) == Dash) + +/* * Types of quote. This is used in various places, so care needs * to be taken when changing them. (Oooh, don't you look surprised.) * - Passed to quotestring() to indicate style. This is the ultimate |