From f2f3b86a5f15500dfee707d27eee9784a1626e4d Mon Sep 17 00:00:00 2001 From: Paul Ackersviller Date: Mon, 25 Jun 2007 03:11:44 +0000 Subject: Merge of 21862/21863/21870: GLOB_SUBST shouldn't swallow up backslashes in parameter substitutions that don't match anything. --- Src/glob.c | 40 +- Src/lex.c | 304 +++++++++----- Src/pattern.c | 13 +- Src/subst.c | 1054 +++++++++++++++++++++++++++++++++++++++++++----- Src/zsh.h | 29 +- Test/D04parameter.ztst | 14 + Test/ztst.zsh | 184 +++++++-- 7 files changed, 1395 insertions(+), 243 deletions(-) diff --git a/Src/glob.c b/Src/glob.c index 93d5e3312..be2dcd5ec 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2487,19 +2487,29 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) mod_export void tokenize(char *s) { - zshtokenize(s, 0); + zshtokenize(s, 0, 0); } +/* + * shtokenize is used when we tokenize a string with GLOB_SUBST set. + * In that case we need to retain backslashes when we turn the + * pattern back into a string, so that the string is not + * modified if it failed to match a pattern. + * + * It may be modified by the effect of SH_GLOB which turns off + * various zsh-specific options. + */ + /**/ mod_export void shtokenize(char *s) { - zshtokenize(s, isset(SHGLOB)); + zshtokenize(s, 1, isset(SHGLOB)); } /**/ static void -zshtokenize(char *s, int shglob) +zshtokenize(char *s, int glbsbst, int shglob) { char *t; int bslash = 0; @@ -2508,9 +2518,10 @@ zshtokenize(char *s, int shglob) cont: switch (*s) { case Bnull: + case Bnullkeep: case '\\': if (bslash) { - s[-1] = Bnull; + s[-1] = glbsbst ? Bnullkeep : Bnull; break; } bslash = 1; @@ -2519,7 +2530,7 @@ zshtokenize(char *s, int shglob) if (shglob) break; if (bslash) { - s[-1] = Bnull; + s[-1] = glbsbst ? Bnullkeep : Bnull; break; } t = s; @@ -2549,7 +2560,7 @@ zshtokenize(char *s, int shglob) for (t = ztokens; *t; t++) if (*t == *s) { if (bslash) - s[-1] = Bnull; + s[-1] = glbsbst ? Bnullkeep : Bnull; else *s = (t - ztokens) + Pound; break; @@ -2569,12 +2580,23 @@ remnulargs(char *s) char *o = s, c; while ((c = *s++)) - if (INULL(c)) { + if (c == Bnullkeep) { + /* + * An active backslash that needs to be turned back into + * a real backslash for output. However, we don't + * do that yet since we need to ignore it during + * pattern matching. + */ + continue; + } else if (INULL(c)) { char *t = s - 1; - while ((c = *s++)) - if (!INULL(c)) + while ((c = *s++)) { + if (c == Bnullkeep) + *t++ = '\\'; + else if (!INULL(c)) *t++ = c; + } *t = '\0'; if (!*o) { o[0] = Nularg; diff --git a/Src/lex.c b/Src/lex.c index 147bea598..52b6885af 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -33,7 +33,7 @@ /* tokens */ /**/ -mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\"; +mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\\\"; /* parts of the current token */ @@ -44,7 +44,7 @@ mod_export char *tokstr; /**/ mod_export int tok; /**/ -int tokfd; +mod_export int tokfd; /* lexical analyzer error flag */ @@ -93,6 +93,11 @@ mod_export int inwhat; /**/ mod_export int addedx; +/* wb and we hold the beginning/end position of the word we are completing. */ + +/**/ +mod_export int wb, we; + /* 1 if aliases should not be expanded */ /**/ @@ -111,8 +116,13 @@ mod_export int parbegin; /**/ mod_export int parend; + +/* don't recognize comments */ -/* text of puctuation tokens */ +/**/ +mod_export int nocomments; + +/* text of punctuation tokens */ /**/ mod_export char *tokstrings[WHILE + 1] = { @@ -167,7 +177,6 @@ struct lexstack { int isfirstch; int histactive; int histdone; - int spaceflag; int stophist; int hlinesz; char *hline; @@ -178,6 +187,7 @@ struct lexstack { char *yytext; char *bptr; int bsiz; + int len; short *chwords; int chwordlen; int chwordpos; @@ -222,7 +232,6 @@ lexsave(void) ls->isfirstch = isfirstch; ls->histactive = histactive; ls->histdone = histdone; - ls->spaceflag = spaceflag; ls->stophist = stophist; ls->hline = chline; ls->hptr = hptr; @@ -236,6 +245,7 @@ lexsave(void) ls->yytext = yytext; ls->bptr = bptr; ls->bsiz = bsiz; + ls->len = len; ls->chwords = chwords; ls->chwordlen = chwordlen; ls->chwordpos = chwordpos; @@ -260,6 +270,7 @@ lexsave(void) inredir = 0; hdocs = NULL; histactive = 0; + ecbuf = NULL; ls->next = lstack; lstack = ls; @@ -282,7 +293,6 @@ lexrestore(void) isfirstch = lstack->isfirstch; histactive = lstack->histactive; histdone = lstack->histdone; - spaceflag = lstack->spaceflag; stophist = lstack->stophist; chline = lstack->hline; hptr = lstack->hptr; @@ -296,6 +306,7 @@ lexrestore(void) yytext = lstack->yytext; bptr = lstack->bptr; bsiz = lstack->bsiz; + len = lstack->len; chwords = lstack->chwords; chwordlen = lstack->chwordlen; chwordpos = lstack->chwordpos; @@ -308,6 +319,8 @@ lexrestore(void) hwbegin = lstack->hwbegin; hwend = lstack->hwend; addtoline = lstack->addtoline; + if (ecbuf) + zfree(ecbuf, eclen); eclen = lstack->eclen; ecused = lstack->ecused; ecnpats = lstack->ecnpats; @@ -339,13 +352,13 @@ yylex(void) char *name; hwbegin(0); - cmdpush(hdocs->type == HEREDOC ? CS_HEREDOC : CS_HEREDOCD); + cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD); STOPHIST name = gethere(hdocs->str, hdocs->type); ALLOWHIST cmdpop(); hwend(); - setheredoc(hdocs->pc, HERESTR, name); + setheredoc(hdocs->pc, REDIR_HERESTR, name); zfree(hdocs, sizeof(struct heredocs)); hdocs = next; } @@ -380,7 +393,7 @@ ctxtlex(void) case BAR: case BARAMP: case INOUTPAR: - case DO: + case DOLOOP: case THEN: case ELIF: case ELSE: @@ -569,6 +582,43 @@ cmd_or_math_sub(void) return skipcomm(); } +/* Check whether we're looking at valid numeric globbing syntax * + * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". * + * Leaves the input in the same place, returning 0 or 1. */ + +/**/ +static int +isnumglob(void) +{ + int c, ec = '-', ret = 0; + int tbs = 256, n = 0; + char *tbuf = (char *)zalloc(tbs); + + while(1) { + c = hgetc(); + if(lexstop) { + lexstop = 0; + break; + } + tbuf[n++] = c; + if(!idigit(c)) { + if(c != ec) + break; + if(ec == '>') { + ret = 1; + break; + } + ec = '>'; + } + if(n == tbs) + tbuf = (char *)realloc(tbuf, tbs *= 2); + } + while(n--) + hungetc(tbuf[n]); + zfree(tbuf, tbs); + return ret; +} + /**/ int gettok(void) @@ -605,7 +655,18 @@ gettok(void) return DOUTPAR; } else if (idigit(c)) { /* handle 1< foo */ d = hgetc(); - if (d == '>' || d == '<') { + if(d == '&') { + d = hgetc(); + if(d == '>') { + peekfd = c - '0'; + hungetc('>'); + c = '&'; + } else { + hungetc(d); + lexstop = 0; + hungetc('&'); + } + } else if (d == '>' || d == '<') { peekfd = c - '0'; c = d; } else { @@ -616,7 +677,7 @@ gettok(void) /* chars in initial position in word */ - if (c == hashchar && + if (c == hashchar && !nocomments && (isset(INTERACTIVECOMMENTS) || (!zleparse && !expanding && (!interact || unset(SHINSTDIN) || strin)))) { @@ -665,6 +726,7 @@ gettok(void) else if (d == '!' || d == '|') return AMPERBANG; else if (d == '>') { + tokfd = peekfd; d = hgetc(); if (d == '!' || d == '|') return OUTANGAMPBANG; @@ -678,7 +740,6 @@ gettok(void) } hungetc(d); lexstop = 0; - tokfd = -1; return AMPOUTANG; } hungetc(d); @@ -719,41 +780,15 @@ gettok(void) if (!incmdpos && d == '(') { hungetc(d); lexstop = 0; + unpeekfd: + if(peekfd != -1) { + hungetc(c); + c = '0' + peekfd; + } break; } - if (d == '>') + if (d == '>') { peek = INOUTANG; - else if (idigit(d) || d == '-') { - int tbs = 256, n = 0, nc; - char *tbuf, *tbp, *ntb; - - tbuf = tbp = (char *)zalloc(tbs); - hungetc(d); - - while ((nc = hgetc()) && !lexstop) { - if (!idigit(nc) && nc != '-') - break; - *tbp++ = (char)nc; - if (++n == tbs) { - ntb = (char *)realloc(tbuf, tbs *= 2); - tbp += ntb - tbuf; - tbuf = ntb; - } - } - if (nc == '>' && !lexstop) { - hungetc(nc); - while (n--) - hungetc(*--tbp); - zfree(tbuf, tbs); - break; - } - if (nc && !lexstop) - hungetc(nc); - lexstop = 0; - while (n--) - hungetc(*--tbp); - zfree(tbuf, tbs); - peek = INANG; } else if (d == '<') { int e = hgetc(); @@ -770,12 +805,13 @@ gettok(void) lexstop = 0; peek = DINANG; } - } else if (d == '&') + } else if (d == '&') { peek = INANGAMP; - else { - peek = INANG; + } else { hungetc(d); - lexstop = 0; + if(isnumglob()) + goto unpeekfd; + peek = INANG; } tokfd = peekfd; return peek; @@ -783,7 +819,7 @@ gettok(void) d = hgetc(); if (d == '(') { hungetc(d); - break; + goto unpeekfd; } else if (d == '&') { d = hgetc(); if (d == '!' || d == '|') @@ -957,8 +993,12 @@ gettokstr(int c, int sub) c = Outbrack; break; case LX2_INPAR: - if ((sub || in_brace_param) && isset(SHGLOB)) - break; + if (isset(SHGLOB)) { + if (sub || in_brace_param) + break; + if (incasepat && !len) + return INPAR; + } if (!in_brace_param) { if (!sub) { e = hgetc(); @@ -1056,29 +1096,27 @@ gettokstr(int c, int sub) if (isset(SHGLOB) && sub) break; e = hgetc(); - if (!(idigit(e) || e == '-' || (e == '(' && intpos))) { - hungetc(e); - lexstop = 0; - if (in_brace_param || sub) - break; - goto brk; - } - c = Inang; - if (e == '(') { - add(c); + if(e == '(' && intpos) { + add(Inang); if (skipcomm()) { peek = LEXERR; goto brk; } c = Outpar; - } else { - add(c); - c = e; - while (c != '>' && !lexstop) - add(c), c = hgetc(); + break; + } + hungetc(e); + if(isnumglob()) { + add(Inang); + while ((c = hgetc()) != '>') + add(c); c = Outang; + break; } - break; + lexstop = 0; + if (in_brace_param || sub) + break; + goto brk; case LX2_EQUALS: if (intpos) { e = hgetc(); @@ -1106,6 +1144,8 @@ gettokstr(int c, int sub) skipparens(Inbrack, Outbrack, &t); } } + if (*t == '+') + t++; if (t == bptr) { e = hgetc(); if (e == '(' && incmdpos) { @@ -1161,7 +1201,7 @@ gettokstr(int c, int sub) goto brk; } e = hgetc(); - if (e != '\'' || unset(RCQUOTES)) + if (e != '\'' || unset(RCQUOTES) || strquote) break; add(c); } @@ -1268,10 +1308,14 @@ dquote_parse(char endchar, int sub) c = hgetc(); if (c != '\n') { if (c == '$' || c == '\\' || (c == '}' && !intick && bct) || - c == endchar || c == '`') + c == endchar || c == '`' || + (endchar == ']' && (c == '[' || c == ']' || + c == '(' || c == ')' || + c == '{' || c == '}' || + (c == '"' && sub)))) add(Bnull); else { - /* lexstop is implicitely handled here */ + /* lexstop is implicitly handled here */ add('\\'); goto cont; } @@ -1353,11 +1397,13 @@ dquote_parse(char endchar, int sub) err = (!brct-- && math); break; case '"': - if (intick || (!endchar && !bct)) + if (intick || ((endchar == ']' || !endchar) && !bct)) break; if (bct) { add(Dnull); + cmdpush(CS_DQUOTE); err = dquote_parse('"', sub); + cmdpop(); c = Dnull; } else err = 1; @@ -1389,6 +1435,22 @@ dquote_parse(char endchar, int sub) /**/ mod_export int parsestr(char *s) +{ + int err; + + if ((err = parsestrnoerr(s))) { + untokenize(s); + if (err > 32 && err < 127) + zerr("parse error near `%c'", NULL, err); + else + zerr("parse error", NULL, 0); + } + return err; +} + +/**/ +mod_export int +parsestrnoerr(char *s) { int l = strlen(s), err; @@ -1405,14 +1467,39 @@ parsestr(char *s) inpop(); DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty."); lexrestore(); + return err; +} + +/**/ +mod_export char * +parse_subscript(char *s, int sub) +{ + int l = strlen(s), err; + char *t; + + if (!*s || *s == ']') + return 0; + lexsave(); + untokenize(t = dupstring(s)); + inpush(t, 0, NULL); + strinbeg(0); + len = 0; + bptr = tokstr = s; + bsiz = l + 1; + err = dquote_parse(']', sub); if (err) { + err = *bptr; + *bptr = 0; untokenize(s); - if (err > 32 && err < 127) - zerr("parse error near `%c'", NULL, err); - else - zerr("parse error", NULL, 0); - } - return err; + *bptr = err; + s = 0; + } else + s = bptr; + strinend(); + inpop(); + DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); + lexrestore(); + return s; } /* Tokenize a string given in s. Parsing is done as if s were a normal * @@ -1420,12 +1507,12 @@ parsestr(char *s) * to parse the right-hand side of ${...%...} substitutions. */ /**/ -int +mod_export int parse_subst_string(char *s) { - int c, l = strlen(s), err; + int c, l = strlen(s), err, olen, lexstop_ret; - if (! *s) + if (!*s || !strcmp(s, nulstring)) return 0; lexsave(); untokenize(s); @@ -1435,11 +1522,13 @@ parse_subst_string(char *s) bptr = tokstr = s; bsiz = l + 1; c = hgetc(); + lexstop_ret = lexstop; c = gettokstr(c, 1); err = errflag; strinend(); inpop(); DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty."); + olen = len; lexrestore(); errflag = err; if (c == LEXERR) { @@ -1447,9 +1536,9 @@ parse_subst_string(char *s) return 1; } #ifdef DEBUG - if (c != STRING || len != l || errflag) { + if (c != STRING || olen != l || errflag) { fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n", - len < l ? "len < l" : errflag ? "errflag" : "c != STRING"); + olen < l ? "len < l" : errflag ? "errflag" : "c != STRING"); fflush(stderr); untokenize(s); return 1; @@ -1458,6 +1547,19 @@ parse_subst_string(char *s) return 0; } +/* Called below to report word positions. */ + +/**/ +mod_export void +gotword(void) +{ + we = ll + 1 - inbufct + (addedx == 2 ? 1 : 0); + if (cs <= we) { + wb = ll - wordbeg + addedx; + zleparse = 0; + } +} + /* expand aliases and reserved words */ /**/ @@ -1502,18 +1604,32 @@ exalias(void) if (tok == STRING) { /* Check for an alias */ - an = noaliases ? NULL : - (Alias) aliastab->getnode(aliastab, yytext); - if (an && !an->inuse && ((an->flags & ALIAS_GLOBAL) || incmdpos || - inalmore)) { - inpush(an->text, INP_ALIAS, an); - /* remove from history if it begins with space */ - if (isset(HISTIGNORESPACE) && an->text[0] == ' ') - remhist(); - lexstop = 0; - if (yytext == copy) - yytext = tokstr; - return 1; + if (!noaliases && isset(ALIASESOPT)) { + char *suf; + + an = (Alias) aliastab->getnode(aliastab, yytext); + if (an && !an->inuse && + ((an->flags & ALIAS_GLOBAL) || incmdpos || inalmore)) { + inpush(an->text, INP_ALIAS, an); + if (an->text[0] == ' ') + aliasspaceflag = 1; + lexstop = 0; + if (yytext == copy) + yytext = tokstr; + return 1; + } + if ((suf = strrchr(yytext, '.')) && suf[1] && + suf > yytext && suf[-1] != Meta && + (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) && + !an->inuse && incmdpos) { + inpush(dupstring(yytext), INP_ALIAS, NULL); + inpush(" ", INP_ALIAS, NULL); + inpush(an->text, INP_ALIAS, an); + lexstop = 0; + if (yytext == copy) + yytext = tokstr; + return 1; + } } /* Then check for a reserved word */ diff --git a/Src/pattern.c b/Src/pattern.c index 393d9bf41..d8bd9ef98 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -260,13 +260,13 @@ static char endseg[] = { static char endstr[] = { '/', /* file only */ - '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, + '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep, /* all patterns */ Tilde, Hat, Pound /* extended glob only */ }; -#define PATENDSTRLEN_NORM 9 -#define PATENDSTRLEN_EXT 12 +#define PATENDSTRLEN_NORM 10 +#define PATENDSTRLEN_EXT 13 /* Default size for pattern buffer */ @@ -1240,6 +1240,13 @@ patcomppiece(int *flagp) */ return 0; break; + case Bnullkeep: + /* + * Marker for restoring a backslash in output: + * does not match a character. + */ + return patcomppiece(flagp); + break; #ifdef DEBUG default: dputs("BUG: character not handled in patcomppiece"); diff --git a/Src/subst.c b/Src/subst.c index 408a9d406..67de61418 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -49,8 +49,10 @@ char nulstring[] = {Nularg, '\0'}; mod_export void prefork(LinkList list, int flags) { - LinkNode node; + LinkNode node, stop = 0; + int keep = 0, asssub = (flags & PF_TYPESET) && isset(KSHTYPESET); + queue_signals(); for (node = firstnode(list); node; incnode(node)) { char *str, c; @@ -61,35 +63,49 @@ prefork(LinkList list, int flags) setdata(node, (void *) getproc(str)); /* <(...) or >(...) */ else setdata(node, (void *) getoutputfile(str)); /* =(...) */ - if (!getdata(node)) + if (!getdata(node)) { + unqueue_signals(); return; + } } else { if (isset(SHFILEEXPANSION)) filesub((char **)getaddrdata(node), flags & (PF_TYPESET|PF_ASSIGN)); - if (!(node = stringsubst(list, node, flags & PF_SINGLE))) + if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) { + unqueue_signals(); return; + } } } for (node = firstnode(list); node; incnode(node)) { + if (node == stop) + keep = 0; if (*(char *)getdata(node)) { remnulargs(getdata(node)); - if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) - while (hasbraces(getdata(node))) + if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) { + if (!keep) + stop = nextnode(node); + while (hasbraces(getdata(node))) { + keep = 1; xpandbraces(list, &node); + } + } if (unset(SHFILEEXPANSION)) filesub((char **)getaddrdata(node), flags & (PF_TYPESET|PF_ASSIGN)); - } else if (!(flags & PF_SINGLE)) + } else if (!(flags & PF_SINGLE) && !keep) uremnode(list, node); - if (errflag) + if (errflag) { + unqueue_signals(); return; + } } + unqueue_signals(); } /**/ static LinkNode -stringsubst(LinkList list, LinkNode node, int ssub) +stringsubst(LinkList list, LinkNode node, int ssub, int asssub) { int qt; char *str3 = (char *)getdata(node); @@ -124,7 +140,7 @@ stringsubst(LinkList list, LinkNode node, int ssub) str3 = (char *)getdata(node); continue; } - } else if ((qt = c == Qtick) || c == Tick) + } else if ((qt = c == Qtick) || (c == Tick ? (mult_isarr = 1) : 0)) comsub: { LinkList pl; char *s, *str2 = str; @@ -181,7 +197,7 @@ stringsubst(LinkList list, LinkNode node, int ssub) continue; } if (!qt && ssub && isset(GLOBSUBST)) - tokenize(s); + shtokenize(s); l1 = str2 - str3; l2 = strlen(s); if (nonempty(pl)) { @@ -203,12 +219,48 @@ stringsubst(LinkList list, LinkNode node, int ssub) str3 = str2; setdata(node, str3); continue; + } else if (asssub && ((c == '=') || c == Equals) && str != str3) { + /* + * We are in a normal argument which looks like an assignment + * and is to be treated like one, with no word splitting. + */ + ssub = 1; } str++; } return errflag ? NULL : node; } +/* + * Simplified version of the prefork/singsub processing where + * we only do substitutions appropriate to quoting. Currently + * this means only the expansions in $'....'. This is used + * for the end tag for here documents. As we are not doing + * `...` expansions, we just use those for quoting. However, + * they stay in the text. This is weird, but that's not + * my fault. + * + * The remnulargs() makes this consistent with the other forms + * of substitution, indicating that quotes have been fully + * processed. + */ + +/**/ +void +quotesubst(char *str) +{ + char *s = str; + + while (*s) { + if (*s == String && s[1] == Snull) { + s = getkeystring(s, NULL, 4, NULL); + } else { + s++; + } + } + remnulargs(str); +} + /**/ mod_export void globlist(LinkList list, int nountok) @@ -218,7 +270,7 @@ globlist(LinkList list, int nountok) badcshglob = 0; for (node = firstnode(list); !errflag && node; node = next) { next = nextnode(node); - glob(list, node, nountok); + zglob(list, node, nountok); } if (badcshglob == 1) zerr("no match", NULL, 0); @@ -230,11 +282,13 @@ globlist(LinkList list, int nountok) mod_export void singsub(char **s) { + int omi = mult_isarr; local_list1(foo); init_list1(foo, *s); prefork(&foo, PF_SINGLE); + mult_isarr = omi; if (errflag) return; *s = (char *) ugetnode(&foo); @@ -256,7 +310,7 @@ static int mult_isarr; /**/ static int -multsub(char **s, char ***a, int *isarr, char *sep) +multsub(char **s, char ***a, int *isarr, UNUSED(char *sep)) { int l, omi = mult_isarr; char **r, **p; @@ -276,6 +330,15 @@ multsub(char **s, char ***a, int *isarr, char *sep) while (nonempty(&foo)) *p++ = (char *)ugetnode(&foo); *p = NULL; + /* + * This is the most obscure way of deciding whether a value is + * an array it would be possible to imagine. It seems to result + * partly because we don't pass down the qt and ssub flags from + * paramsubst() through prefork() properly, partly because we + * don't tidy up to get back the return type from multsub we + * need properly. The crux of neatening this up is to get rid + * of the following test. + */ if (a && mult_isarr) { *a = r; *isarr = SCANPM_MATCHMANY; @@ -307,7 +370,7 @@ multsub(char **s, char ***a, int *isarr, char *sep) mod_export void filesub(char **namptr, int assign) { - char *sub = NULL, *str, *ptr; + char *eql = NULL, *sub = NULL, *str, *ptr; int len; filesubstr(namptr, assign); @@ -316,7 +379,7 @@ filesub(char **namptr, int assign) return; if (assign & PF_TYPESET) { - if ((*namptr)[1] && (sub = strchr(*namptr + 1, Equals))) { + if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) { str = sub + 1; if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) { sub[1] = '\0'; @@ -330,7 +393,9 @@ filesub(char **namptr, int assign) while ((sub = strchr(ptr, ':'))) { str = sub + 1; len = sub - *namptr; - if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) { + if (sub > eql && + (sub[1] == Tilde || sub[1] == Equals) && + filesubstr(&str, assign)) { sub[1] = '\0'; *namptr = dyncat(*namptr, str); } @@ -397,15 +462,9 @@ filesubstr(char **namptr, int assign) sav = *pp; *pp = 0; if (!(cnam = findcmd(str + 1, 1))) { - Alias a = (Alias) aliastab->getnode(aliastab, str + 1); - - if (a) - cnam = a->text; - else { - if (isset(NOMATCH)) - zerr("%s not found", str + 1, 0); - return 0; - } + if (isset(NOMATCH)) + zerr("%s not found", str + 1, 0); + return 0; } *namptr = dupstring(cnam); if (sav) { @@ -430,14 +489,14 @@ strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub, if (!pl && (!s || !*s)) { *d = dest = (copied ? src : dupstring(src)); if (glbsub) - tokenize(dest); + shtokenize(dest); } else { *d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1); strncpy(dest, pb, pl); dest += pl; strcpy(dest, src); if (glbsub) - tokenize(dest); + shtokenize(dest); dest += l; if (s) strcpy(dest, s); @@ -519,6 +578,72 @@ invcstrpcmp(const void *a, const void *b) #endif } +/**/ +int +nstrpcmp(const void *a, const void *b) +{ + char *c = *(char **)a, *d = *(char **)b; + int cmp; + +#ifdef HAVE_STRCOLL + cmp = strcoll(c, d); +#endif + for (; *c == *d && *c; c++, d++); +#ifndef HAVE_STRCOLL + cmp = (int)STOUC(*c) - (int)STOUC(*d); +#endif + if (idigit(*c) || idigit(*d)) { + for (; c > *(char **)a && idigit(c[-1]); c--, d--); + if (idigit(*c) && idigit(*d)) { + while (*c == '0') + c++; + while (*d == '0') + d++; + for (; idigit(*c) && *c == *d; c++, d++); + if (idigit(*c) || idigit(*d)) { + cmp = (int)STOUC(*c) - (int)STOUC(*d); + while (idigit(*c) && idigit(*d)) + c++, d++; + if (idigit(*c) && !idigit(*d)) + return 1; + if (idigit(*d) && !idigit(*c)) + return -1; + } + } + } + return cmp; +} + +/**/ +int +invnstrpcmp(const void *a, const void *b) +{ + return -nstrpcmp(a, b); +} + +/**/ +int +instrpcmp(const void *a, const void *b) +{ + VARARR(char, c, strlen(*(char **) a) + 1); + VARARR(char, d, strlen(*(char **) b) + 1); + char **e = (char **)&c; + char **f = (char **)&d; + char *s, *t; + + for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); + for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); + + return nstrpcmp(&e, &f); +} + +/**/ +int +invinstrpcmp(const void *a, const void *b) +{ + return -instrpcmp(a, b); +} + /**/ static char * dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul) @@ -702,17 +827,24 @@ get_intarg(char **s) /* Parsing for the (e) flag. */ static int -subst_parse_str(char **sp, int single) +subst_parse_str(char **sp, int single, int err) { char *s; *sp = s = dupstring(*sp); - if (!parsestr(s)) { + if (!(err ? parsestr(s) : parsestrnoerr(s))) { if (!single) { + int qt = 0; + for (; *s; s++) - if (*s == Qstring) - *s = String; + if (!qt) { + if (*s == Qstring) + *s = String; + else if (*s == Qtick) + *s = Tick; + } else if (*s == Dnull) + qt = !qt; } return 0; } @@ -724,6 +856,23 @@ subst_parse_str(char **sp, int single) #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring) #define isbrack(c) ((c) == '[' || (char)(c) == Inbrack) +/* + * Given a linked list l with node n, perform parameter substitution + * starting from *str. Return the node with the substitutuion performed + * or NULL if it failed. + * + * If qt is true, the `$' was quoted. TODO: why can't we just look + * to see if the first character was String or Qstring? + * + * If ssub is true, we are being called via singsubst(), which means + * the result will be a single word. TODO: can we generate the + * single word at the end? TODO: if not, or maybe in any case, + * can we pass down the ssub flag from prefork with the other flags + * instead of pushing it into different arguments? (How exactly + * to qt and ssub differ? Are both necessary, if so is there some + * better way of separating the two?) + */ + /**/ LinkNode paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) @@ -731,41 +880,207 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) char *aptr = *str, c, cc; char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n); int colf; /* != 0 means we found a colon after the name */ + /* + * There are far too many flags. They need to be grouped + * together into some structure which ties them to where they + * came from. + * + * Some flags have a an obscure relationship to their effect which + * depends on incrementing them to particular values in particular + * ways. + */ + /* + * Whether the value is an array (in aval) or not (in val). There's + * a movement from storing the value in the stuff read from the + * parameter (the value v) to storing them in val and aval. + * However, sometimes you find v reappearing temporarily. + * + * The values -1 and 2 are special to isarr. It looks like 2 is + * some kind of an internal flag to do with whether the array's been + * copied, in which case I don't know why we don't use the copied + * flag, but they do both occur close together so they presumably + * have different effects. The value -1 is isued to force us to + * keep an empty array. It's tested in the YUK chunk (I mean the + * one explicitly marked as such). + */ int isarr = 0; + /* + * This is just the setting of the option except we need to + * take account of ^ and ^^. + */ int plan9 = isset(RCEXPANDPARAM); + /* + * Likwise, but with ~ and ~~. Also, we turn it off later + * on if qt is passed down. + */ int globsubst = isset(GLOBSUBST); + /* + * Indicates ${#pm}, massaged by whichlen which is set by + * the (c), (w), and (W) flags to indicate how we take the length. + */ int getlen = 0; int whichlen = 0; + /* + * Indicates ${+pm}: a simple boolean for once. + */ int chkset = 0; + /* + * Indicates we have tried to get a value in v but that was + * unset. I don't quite understand why (v == NULL) isn't + * good enough, but there are places where we seem to need + * to second guess whether a value is a real value or not. + */ int vunset = 0; + /* + * Indicates (t) flag, i.e. print out types. The code for + * this actually isn't too horrifically inbred compared with + * that for (P). + */ int wantt = 0; + /* + * Indicates spliting a string into an array. There aren't + * actually that many special cases for this --- which may + * be why it doesn't work properly; we split in some cases + * where we shouldn't, in particular on the multsubs for + * handling embedded values for ${...=...} and the like. + */ int spbreak = isset(SHWORDSPLIT) && !ssub && !qt; + /* Scalar and array value, see isarr above */ char *val = NULL, **aval = NULL; + /* + * Padding based on setting in parameter rather than substitution + * flags. This is only used locally. + */ unsigned int fwidth = 0; + /* + * vbuf and v are both used to retrieve parameter values; this + * is a kludge, we pass down vbuf and it may or may not return v. + */ struct value vbuf; Value v = NULL; + /* + * This expressive name refers to the set of flags which + * is applied to matching for #, %, / and their doubled variants: + * (M), (R), (B), (E), (N), (S). + */ int flags = 0; + /* Value from (I) flag, used for ditto. */ int flnum = 0; - int sortit = 0, casind = 0; + /* + * sortit is an obscure combination of the settings for (o), (O), + * (i) and (n). casind is (i) and numord is (n); these are + * separate so we can have fun doing the obscure combinatorics later. + * indord is the (a) flag, which for consistency doesn't get + * combined into sortit. + */ + int sortit = 0, casind = 0, numord = 0, indord = 0; + /* (u): straightforward. */ + int unique = 0; + /* combination of (L), (U) and (C) flags. */ int casmod = 0; + /* + * quotemod says we are doing either (q) (positive), (Q) (negative) + * or not (0). quotetype counts the q's for the first case. + * quoterr is simply (X) but gets passed around a lot because the + * combination (eX) needs it. + */ int quotemod = 0, quotetype = 0, quoteerr = 0; + /* + * (V) flag: fairly straightforward, except that as with so + * many flags it's not easy to decide where to put it in the order. + */ int visiblemod = 0; + /* + * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied + * spbreak, see above; fairly straighforward in use but c.f. + * the comment for visiblemod. + */ + int shsplit = 0; + /* + * The separator from (j) and (s) respectively, or (F) and (f) + * respectively (hardwired to "\n" in that case). Slightly + * confusingly also used for ${#pm}, thought that's at least + * documented in the manual + */ char *sep = NULL, *spsep = NULL; + /* + * Padding strings. The left and right padding strings which + * are repeated, then the ones which only occur once, for + * the (l) and (r) flags. + */ char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL; - char *replstr = NULL; /* replacement string for /orig/repl */ + /* Replacement string for /orig/repl and //orig/repl */ + char *replstr = NULL; + /* The numbers for (l) and (r) */ zlong prenum = 0, postnum = 0; + /* + * Whether the value has been copied. Optimisation: if we + * are modifying an expression, we only need to copy it the + * first time, and if we don't modify it we can just use the + * value from the parameter or input. + */ int copied = 0; + /* + * The (A) flag for array assignment, with consequences for + * splitting and joining; (AA) gives arrasg == 2 for associative + * arrays. + */ int arrasg = 0; + /* + * The (e) flag. As we need to do extra work not quite + * at the end, the effect of this is kludged in in several places. + */ int eval = 0; + /* + * The (P) flag. This interacts a bit obscurely with whether + * or not we are dealing with a sub expression (subexp). + */ int aspar = 0; + /* + * The (%) flag, c.f. visiblemod again. + */ int presc = 0; + /* + * The (@) flag; interacts obscurely with qt and isarr. + * This is one of the things that decides whether multsub + * will produce an array, but in an extremely indirect fashion. + */ int nojoin = 0; - char inbrace = 0; /* != 0 means ${...}, otherwise $... */ + /* + * != 0 means ${...}, otherwise $... What works without braces + * is largely a historical artefact (everything works with braces, + * I sincerely hope). + */ + char inbrace = 0; + /* + * Use for the (k) flag. Goes down into the parameter code, + * sometimes. + */ char hkeys = 0; + /* + * Used for the (v) flag, ditto. Not quite sure why they're + * separate, but the tradition seems to be that things only + * get combined when that makes the result more obscure rather + * than less. + */ char hvals = 0; + /* + * Whether we had to evaluate a subexpression, i.e. an + * internal ${...} or $(...) or plain $pm. We almost don't + * need to remember this (which would be neater), but the (P) + * flag means the subexp and !subexp code is obscurely combined, + * and the argument passing to fetchvalue has another kludge. + */ int subexp; *s++ = '\0'; + /* + * Nothing to do unless the character following the $ is + * something we recognise. + * + * Shouldn't this be a table or something? We test for all + * these later on, too. + */ if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' && c != '!' && c != '$' && c != String && c != Qstring && c != '?' && c != Quest && c != '_' && @@ -777,9 +1092,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) return n; } DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()"); + /* + * Extra processing if there is an opening brace: mostly + * flags in parentheses, but also one ksh hack. + */ if (c == Inbrace) { inbrace = 1; s++; + /* + * In ksh emulation a leading `!' is a special flag working + * sort of like our (k). + * TODO: this is one of very few cases tied directly to + * the emulation mode rather than an option. Since ksh + * doesn't have parameter flags it might be neater to + * handle this with the ^, =, ~ stuff, below. + */ if ((c = *s) == '!' && s[1] != Outbrace && emulation == EMULATE_KSH) { hkeys = SCANPM_WANTKEYS; s++; @@ -787,6 +1114,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) char *t, sav; int tt = 0; zlong num; + /* + * The (p) flag is (uniquely) only remembered within + * this block. It says we do print-style handling + * on the values for flags, but only on those. + * This explains the ghastly macro, but why can't it + * be a function? UNTOK_AND_ESCAPE is defined + * so that the argument must be an lvalue. + */ int escapes = 0; int klen; #define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C)) @@ -852,6 +1187,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case 'i': casind = 1; break; + case 'n': + numord = 1; + break; + case 'a': + indord = 1; + break; case 'V': visiblemod++; @@ -971,6 +1312,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) presc++; break; + case 'z': + shsplit = 1; + break; + + case 'u': + unique = 1; + break; + default: flagerr: zerr("error in flags", NULL, 0); @@ -980,44 +1329,76 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) s++; } } + /* Sort is done by indexing on sortit-1: + * bit 1: ascending (o)/descending (O) + * bit 2: case sensitive/independent (i) + * bit 3: strict order/numeric (n) + * unless indord (a) is set set, in which case only test for + * descending by assuming only (O) is possible (not verified). + */ if (sortit) - sortit += (casind << 1); + sortit += (casind << 1) + (numord << 2); + /* + * premul, postmul specify the padding character to be used + * multiple times with the (l) and (r) flags respectively. + */ if (!premul) premul = " "; if (!postmul) postmul = " "; + /* + * Look for special unparenthesised flags. + * TODO: could make these able to appear inside parentheses, too, + * i.e. ${(^)...} etc. + */ for (;;) { if ((c = *s) == '^' || c == Hat) { + /* RC_EXPAND_PARAM on or off (doubled )*/ if ((c = *++s) == '^' || c == Hat) { plan9 = 0; s++; } else plan9 = 1; } else if ((c = *s) == '=' || c == Equals) { + /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */ if ((c = *++s) == '=' || c == Equals) { spbreak = 0; s++; } else - spbreak = 1; + spbreak = 2; } else if ((c == '#' || c == Pound) && (iident(cc = s[1]) || cc == '*' || cc == Star || cc == '@' || cc == '-' || (cc == ':' && s[2] == '-') - || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) + || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) { getlen = 1 + whichlen, s++; - else if (c == '~' || c == Tilde) { + /* + * Return the length of the parameter. + * getlen can be more than 1 to indicate characters (2), + * words ignoring multiple delimiters (3), words taking + * account of multiple delimiters. delimiter is in + * spsep, NULL means $IFS. + */ + } else if (c == '~' || c == Tilde) { + /* GLOB_SUBST on or off (doubled) */ if ((c = *++s) == '~' || c == Tilde) { globsubst = 0; s++; } else globsubst = 1; } else if (c == '+') { + /* + * Return whether indicated parameter is set. + * Try to handle this when parameter is named + * by (P) (second part of test). + */ if (iident(s[1]) || (aspar && isstring(s[1]) && (s[2] == Inbrace || s[2] == Inpar))) chkset = 1, s++; else if (!inbrace) { + /* Special case for `$+' on its own --- leave unmodified */ *aptr = '$'; *str = aptr + 1; return n; @@ -1025,13 +1406,31 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) zerr("bad substitution", NULL, 0); return NULL; } - } else if (inbrace && INULL(*s)) + } else if (inbrace && INULL(*s)) { + /* + * Handles things like ${(f)"$(pm && (v->pm->flags & PM_UNSET))) vunset = 1; if (wantt) { + /* + * Handle the (t) flag: value now becomes the type + * information for the parameter. + */ if (v && v->pm && !(v->pm->flags & PM_UNSET)) { int f = v->pm->flags; @@ -1104,6 +1561,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = dyncat(val, "-unique"); if (f & PM_HIDE) val = dyncat(val, "-hide"); + if (f & PM_HIDE) + val = dyncat(val, "-hideval"); if (f & PM_SPECIAL) val = dyncat(val, "-special"); vunset = 0; @@ -1114,8 +1573,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) isarr = 0; } } + /* + * We get in here two ways; either we need to convert v into + * the local value system, or we need to get rid of brackets + * even if there isn't a v. + */ while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) { if (!v) { + /* + * Index applied to non-existent parameter; we may or may + * not have a value to index, however. Create a temporary + * empty parameter as a trick, and index on that. This + * usually happens the second time around the loop when + * we've used up the original parameter value and want to + * apply a subscript to what's left. However, it's also + * possible it's got something to do with some of that murky + * passing of -1's as the third argument to fetchvalue() to + * inhibit bracket parsing at that stage. + */ Param pm; char *os = s; @@ -1126,6 +1601,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) isarr = 0; } pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR); + DPUTS(!pm, "BUG: parameter not created"); if (isarr) pm->u.arr = aval; else @@ -1133,10 +1609,25 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) v = (Value) hcalloc(sizeof *v); v->isarr = isarr; v->pm = pm; - v->b = -1; - if (getindex(&s, v) || s == os) + v->end = -1; + if (getindex(&s, v, qt) || s == os) break; } + /* + * This is where we extract a value (we know now we have + * one) into the local parameters for a scalar (val) or + * array (aval) value. TODO: move val and aval into + * a structure with a discriminator. Hope we can make + * more things array values at this point and dearrayify later. + * v->isarr tells us whether the stuff form down below looks + * like an array. Unlike multsub() this is probably clean + * enough to keep, although possibly the parameter passing + * needs reorganising. + * + * I think we get to discard the existing value of isarr + * here because it's already been taken account of, either + * in the subexp stuff or immediately above. + */ if ((isarr = v->isarr)) { /* No way to get here with v->inv != 0, so getvaluearr() * * is called by getarrvalue(); needn't test PM_HASHED. */ @@ -1146,17 +1637,37 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } else aval = getarrvalue(v); } else { + /* Value retrieved from parameter/subexpression is scalar */ if (v->pm->flags & PM_ARRAY) { - int tmplen = arrlen(v->pm->gets.afn(v->pm)); - - if (v->a < 0) - v->a += tmplen + v->inv; - if (!v->inv && (v->a >= tmplen || v->a < 0)) + /* + * Although the value is a scalar, the parameter + * itself is an array. Presumably this is due to + * being quoted, or doing single substitution or something, + * TODO: we're about to do some definitely stringy + * stuff, so something like this bit is probably + * necessary. However, I'd like to leave any + * necessary joining of arrays until this point + * to avoid the multsub() horror. + */ + int tmplen = arrlen(v->pm->gsu.a->getfn(v->pm)); + + if (v->start < 0) + v->start += tmplen + v->inv; + if (!v->inv && (v->start >= tmplen || v->start < 0)) vunset = 1; } if (!vunset) { + /* + * There really is a value. Apply any necessary + * padding or case transformation. Note these + * are the per-parameter transformations specified + * with typeset, not the per-substitution ones set + * by flags. TODO: maybe therefore this would + * be more consistent if moved into getstrvalue()? + * Bet that's easier said than done. + */ val = getstrvalue(v); - fwidth = v->pm->ct ? v->pm->ct : strlen(val); + fwidth = v->pm->width ? v->pm->width : (int)strlen(val); switch (v->pm->flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) { char *t; unsigned int t0; @@ -1184,17 +1695,67 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) int zero = 1; if (strlen(val) < fwidth) { + char *valprefend = val; if (v->pm->flags & PM_RIGHT_Z) { - for (t = val; iblank(*t); t++); - if (!*t || !idigit(*t)) + /* + * This is a documented feature: when deciding + * whether to pad with zeroes, ignore + * leading blanks already in the value; + * only look for numbers after that. + * Not sure how useful this really is. + * It's certainly confusing to code around. + */ + for (t = val; iblank(*t); t++) + ; + /* + * Allow padding after initial minus + * for numeric variables. + */ + if ((v->pm->flags & + (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) && + *t == '-') + t++; + /* + * Allow padding after initial 0x or + * base# for integer variables. + */ + if (v->pm->flags & PM_INTEGER) { + if (isset(CBASES) && + t[0] == '0' && t[1] == 'x') + t += 2; + else if ((valprefend = strchr(t, '#'))) + t = valprefend + 1; + } + valprefend = t; + if (!*t) + zero = 0; + else if (v->pm->flags & + (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) { + /* zero always OK */ + } else if (!idigit(*t)) zero = 0; } t = (char *) hcalloc(fwidth + 1); memset(t, (((v->pm->flags & PM_RIGHT_B) || !zero) ? ' ' : '0'), fwidth); + /* + * How can the following trigger? We + * haven't altered val or fwidth since + * the last time we tested this. + */ if ((t0 = strlen(val)) > fwidth) t0 = fwidth; - strcpy(t + (fwidth - t0), val); + /* + * Copy - or 0x or base# before any padding + * zeroes. + */ + if (zero && val != valprefend) { + int preflen = valprefend - val; + memcpy(t, val, preflen); + strcpy(t + (fwidth - t0) + preflen, + valprefend); + } else + strcpy(t + (fwidth - t0), val); val = t; } else { t = (char *) hcalloc(fwidth + 1); @@ -1221,10 +1782,58 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } } + /* + * Finished with the original parameter and its indices; + * carry on looping to see if we need to do more indexing. + * This means we final get rid of v in favour of val and + * aval. We could do with somehow encapsulating the bit + * where we need v. + */ v = NULL; if (!inbrace) break; } + /* + * We're now past the name or subexpression; the only things + * which can happen now are a closing brace, one of the standard + * parameter postmodifiers, or a history-style colon-modifier. + * + * Again, this duplicates tests for characters we're about to + * examine properly later on. + */ + if (inbrace && + (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' && + c != '=' && c != Equals && + c != '#' && c != Pound && + c != '?' && c != Quest && + c != '}' && c != Outbrace) { + zerr("bad substitution", NULL, 0); + return NULL; + } + /* + * Join arrays up if we're in quotes and there isn't some + * override such as (@). + * TODO: hmm, if we're called as part of some recursive + * substitution do we want to delay this until we get back to + * the top level? Or is if there's a qt (i.e. this parameter + * substitution is in quotes) always good enough? Potentially + * we may be OK by now --- all potential `@'s and subexpressions + * have been handled, including any [@] index which comes up + * by virture of v->isarr being set to SCANPM_ISVAR_AT which + * is now in isarr. + * + * However, if we are replacing multsub() with something that + * doesn't mangle arrays, we may need to delay this step until after + * the foo:- or foo:= or whatever that causes that. Note the value + * (string or array) at this point is irrelevant if we are going to + * be doing that. This would mean // and stuff get applied + * arraywise even if quoted. That's probably wrong, so maybe + * this just stays. + * + * We do a separate stage of dearrayification in the YUK chunk, + * I think mostly because of the way we make array or scalar + * values appear to the caller. + */ if (isarr) { if (nojoin) isarr = -1; @@ -1235,9 +1844,20 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } idend = s; - if (inbrace) + if (inbrace) { + /* + * This is to match a closing double quote in case + * we didn't have a subexpression, e.g. ${"foo"}. + * This form is pointless, but logically it ought to work. + */ while (INULL(*s)) s++; + } + /* + * We don't yet know whether a `:' introduces a history-style + * colon modifier or qualifies something like ${...:=...}. + * But if we remember the colon here it's easy to check later. + */ if ((colf = *s == ':')) s++; @@ -1268,13 +1888,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (inbrace && ((c = *s) == '-' || c == '+' || - c == ':' || + c == ':' || /* i.e. a doubled colon */ c == '=' || c == Equals || c == '%' || c == '#' || c == Pound || c == '?' || c == Quest || c == '/')) { + /* + * Default index is 1 if no (I) or (I) gave zero. But + * why don't we set the default explicitly at the start + * and massage any passed index where we set flnum anyway? + */ if (!flnum) flnum++; if (c == '%') @@ -1297,7 +1922,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if ((c = *s) == '/') { /* doubled, so replace all occurrences */ flags |= SUB_GLOBAL; - s++; + c = *++s; } /* Check for anchored substitution */ if (c == '%') { @@ -1314,19 +1939,25 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) * If there isn't one, we're just going to delete that, * i.e. replace it with an empty string. * - * This allows quotation of the slash with '\\/'. Why - * two? Well, for a non-quoted string we can check for - * Bnull+/, which is what you get from `\/', but inside - * double quotes the Bnull isn't there, so it's not - * consistent. + * We used to use double backslashes to quote slashes, + * but actually that was buggy and using a single backslash + * is easier and more obvious. */ for (ptr = s; (c = *ptr) && c != '/'; ptr++) - if (c == '\\' && ptr[1] == '/') - chuck(ptr); + { + if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1]) + { + if (ptr[1] == '/') + chuck(ptr); + else + ptr++; + } + } replstr = (*ptr && ptr[1]) ? ptr+1 : ""; *ptr = '\0'; } + /* See if this was ${...:-...}, ${...:=...}, etc. */ if (colf) flags |= SUB_ALL; /* @@ -1353,11 +1984,29 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case '-': if (vunset) { val = dupstring(s); - multsub(&val, NULL, &isarr, NULL); + /* + * This is not good enough for sh emulation! Sh would + * split unquoted substrings, yet not split quoted ones + * (except according to $@ rules); but this leaves the + * unquoted substrings unsplit, and other code below + * for spbreak splits even within the quoted substrings. + * + * TODO: I think multsub needs to be told enough to + * decide about splitting with spbreak at this point + * (and equally in the `=' handler below). Then + * we can turn off spbreak to avoid the join & split + * nastiness later. + * + * What we really want to do is make this look as + * if it were the result of an assignment from + * the same value, taking account of quoting. + */ + multsub(&val, (aspar ? NULL : &aval), &isarr, NULL); copied = 1; } break; case ':': + /* this must be `::=', unconditional assignment */ if (*s != '=' && *s != Equals) goto noclosebrace; vunset = 1; @@ -1372,17 +2021,26 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) *idend = '\0'; val = dupstring(s); isarr = 0; + /* + * TODO: this is one of those places where I don't + * think we want to do the joining until later on. + * We also need to handle spbreak and spsep at this + * point and unset them. + */ if (spsep || spbreak || !arrasg) multsub(&val, NULL, NULL, sep); else multsub(&val, &aval, &isarr, NULL); if (arrasg) { + /* + * This is an array assignment in a context + * where we have no syntactic way of finding + * out what an array element is. So we just guess. + */ char *arr[2], **t, **a, **p; if (spsep || spbreak) { aval = sepsplit(val, spsep, 0, 1); isarr = 2; - sep = spsep = NULL; - spbreak = 0; l = arrlen(aval); if (l && !*(aval[l-1])) l--; @@ -1411,7 +2069,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (arrasg > 1) { Param pm = sethparam(idbeg, a); if (pm) - aval = paramvalarr(pm->gets.hfn(pm), hkeys|hvals); + aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals); } else setaparam(idbeg, a); } else { @@ -1420,6 +2078,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } *idend = sav; copied = 1; + if (isarr) { + if (nojoin) + isarr = -1; + if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) { + val = sepjoin(aval, sep, 1); + isarr = 0; + } + sep = spsep = NULL; + spbreak = 0; + } } break; case '?': @@ -1440,7 +2108,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case '#': case Pound: case '/': - if (qt) { + /* This once was executed only `if (qt) ...'. But with that + * patterns in a expansion resulting from a ${(e)...} aren't + * tokenized even though this function thinks they are (it thinks + * they are because subst_parse_str() turns Qstring tokens + * into String tokens and for unquoted parameter expansions the + * lexer normally does tokenize patterns inside parameter + * expansions). */ + { int one = noerrs, oef = errflag, haserr; if (!quoteerr) @@ -1450,7 +2125,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (!quoteerr) { errflag = oef; if (haserr) - tokenize(s); + shtokenize(s); } else if (haserr || errflag) { zerr("parse error in ${...%c...} substitution", NULL, s[-1]); @@ -1458,9 +2133,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } { +#if 0 + /* + * This allows # and % to be at the start of + * a parameter in the substitution, which is + * a bit nasty, and can be done (although + * less efficiently) with anchors. + */ + char t = s[-1]; singsub(&s); + if (t == '/' && (flags & SUB_SUBSTR)) { if ((c = *s) == '#' || c == '%') { flags &= ~SUB_SUBSTR; @@ -1471,8 +2155,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) s++; } } +#else + singsub(&s); +#endif } + /* + * Either loop over an array doing replacements or + * do the replacment on a string. + */ if (!vunset && isarr) { getmatcharr(&aval, s, flags, flnum, replstr); copied = 1; @@ -1485,6 +2176,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) break; } } else { /* no ${...=...} or anything, but possible modifiers. */ + /* + * Handler ${+...}. TODO: strange, why do we handle this only + * if there isn't a trailing modifier? Why don't we do this + * e.g. when we hanlder the ${(t)...} flag? + */ if (chkset) { val = dupstring(vunset ? "0" : "1"); isarr = 0; @@ -1497,6 +2193,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = dupstring(""); } if (colf) { + /* + * History style colon modifiers. May need to apply + * on multiple elements of an array. + */ s--; if (unset(KSHARRAYS) || inbrace) { if (!isarr) @@ -1533,6 +2233,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } if (errflag) return NULL; + /* + * This handles taking a length with ${#foo} and variations. + * TODO: again. one might naively have thought this had the + * same sort of effect as the ${(t)...} flag and the ${+...} + * test, although in this case we do need the value rather + * the the parameter, so maybe it's a bit different. + */ if (getlen) { long len = 0; char buf[14]; @@ -1563,6 +2270,23 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = dupstring(buf); isarr = 0; } + /* + * I think this mult_isarr stuff here is used to pass back + * the setting of whether we are an array to multsub, and + * thence to the top-level paramsubst(). The way the + * setting is passed back is completely obscure, however. + * It's presumably at this point because we try to remember + * whether the value was `really' an array before massaging + * some special cases. + * + * TODO: YUK. This is not the right place to turn arrays into + * scalars; we should pass back as an array, and let the calling + * code decide how to deal with it. This is almost certainly + * a lot harder than it sounds. Do we really need to handle + * one-element arrays as scalars at this point? Couldn't + * we just test for it later rather than having a multiple-valued + * wave-function for isarr? + */ mult_isarr = isarr; if (isarr > 0 && !plan9 && (!aval || !aval[0])) { val = dupstring(""); @@ -1577,6 +2301,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } /* ssub is true when we are called from singsub (via prefork). * It means that we must join arrays and should not split words. */ + /* + * TODO: this is what is screwing up the use of SH_WORD_SPLIT + * after `:-' etc. If we fix multsub(), we might get away + * with simply unsetting the appropriate flags when they + * get handled. + */ if (ssub || spbreak || spsep || sep) { if (isarr) val = sepjoin(aval, sep, 1), isarr = 0; @@ -1591,6 +2321,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } mult_isarr = isarr; } + /* + * Perform case modififications. + */ if (casmod) { if (isarr) { char **ap; @@ -1620,6 +2353,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) makecapitals(&val); } } + /* + * Perform prompt-style modifications. + */ if (presc) { int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG]; int opp = opts[PROMPTPERCENT], len; @@ -1628,6 +2364,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) opts[PROMPTPERCENT] = 1; opts[PROMPTSUBST] = opts[PROMPTBANG] = 0; } + /* + * TODO: It would be really quite nice to abstract the + * isarr and !issarr code into a function which gets + * passed a pointer to a function with the effect of + * the promptexpand bit. Then we could use this for + * a lot of stuff and bury val/aval/isarr inside a structure + * which gets passed to it. + */ if (isarr) { char **ap; @@ -1635,23 +2379,33 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) aval = arrdup(aval), copied = 1; ap = aval; for (; *ap; ap++) { + char *tmps; unmetafy(*ap, &len); untokenize(*ap); - *ap = unmetafy(promptexpand(metafy(*ap, len, META_NOALLOC), - 0, NULL, NULL), &len); + tmps = unmetafy(promptexpand(metafy(*ap, len, META_NOALLOC), + 0, NULL, NULL), &len); + *ap = dupstring(tmps); + free(tmps); } } else { + char *tmps; if (!copied) val = dupstring(val), copied = 1; unmetafy(val, &len); untokenize(val); - val = unmetafy(promptexpand(metafy(val, len, META_NOALLOC), + tmps = unmetafy(promptexpand(metafy(val, len, META_NOALLOC), 0, NULL, NULL), &len); + val = dupstring(tmps); + free(tmps); } opts[PROMPTSUBST] = ops; opts[PROMPTBANG] = opb; opts[PROMPTPERCENT] = opp; } + /* + * One of the possible set of quotes to apply, depending on + * the repetitions of the (q) flag. + */ if (quotemod) { if (--quotetype > 3) quotetype = 3; @@ -1735,6 +2489,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } } + /* + * Transform special characters in the string to make them + * printable. + */ if (visiblemod) { if (isarr) { char **ap; @@ -1748,6 +2506,54 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = nicedupstring(val); } } + /* + * Nothing particularly to do with SH_WORD_SPLIT --- this + * performs lexical splitting on a string as specified by + * the (z) flag. + */ + if (shsplit) { + LinkList list = NULL; + + if (isarr) { + char **ap; + for (ap = aval; *ap; ap++) + list = bufferwords(list, *ap, NULL); + isarr = 0; + } else + list = bufferwords(NULL, val, NULL); + + if (!list || !firstnode(list)) + val = dupstring(""); + else if (!nextnode(firstnode(list))) + val = getdata(firstnode(list)); + else { + char **ap; + LinkNode node; + + aval = ap = (char **) zhalloc((countlinknodes(list) + 1) * + sizeof(char *)); + for (node = firstnode(list); node; incnode(node)) + *ap++ = (char *) getdata(node); + *ap = NULL; + mult_isarr = isarr = 2; + } + copied = 1; + } + /* + * TODO: hmm. At this point we have to be on our toes about + * whether we're putting stuff into a line or not, i.e. + * we don't want to do this from a recursive call; this is + * probably part of the point of the mult_isarr monkey business. + * Rather than passing back flags in a non-trivial way, maybe + * we could decide on the basis of flags passed down to us. + * + * This is the ideal place to do any last-minute conversion from + * array to strings. However, given all the transformations we've + * already done, probably if it's going to be done it will already + * have been. (I'd really like to keep everying in aval or + * equivalent and only locally decide if we need to treat it + * as a scalar.) + */ if (isarr) { char *x; char *y; @@ -1755,36 +2561,78 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) int i; LinkNode on = n; - if (!aval[0] && !plan9) { + /* Handle the (u) flag; we need this before the next test */ + if (unique) { + if(!copied) + aval = arrdup(aval); + + i = arrlen(aval); + if (i > 1) + zhuniqarray(aval); + } + if ((!aval[0] || !aval[1]) && !plan9) { + /* + * Empty array or single element. Currently you only + * get a single element array at this point from the + * unique expansion above. but we can potentially + * have other reasons. + * + * The following test removes the markers + * from surrounding double quotes, but I don't know why + * that's necessary. + */ + int vallen; if (aptr > (char *) getdata(n) && aptr[-1] == Dnull && *fstr == Dnull) *--aptr = '\0', fstr++; - y = (char *) hcalloc((aptr - ostr) + strlen(fstr) + 1); + vallen = aval[0] ? strlen(aval[0]) : 0; + y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1); strcpy(y, ostr); *str = y + (aptr - ostr); + if (vallen) + { + strcpy(*str, aval[0]); + *str += vallen; + } strcpy(*str, fstr); setdata(n, y); return n; } + /* Handle (o) and (O) and their variants */ if (sortit) { - static CompareFn sortfn[] = { - strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp - }; - if (!copied) aval = arrdup(aval); - - i = arrlen(aval); - if (i && (*aval[i-1] || --i)) - qsort(aval, i, sizeof(char *), sortfn[sortit-1]); + if (indord) { + if (sortit & 2) { + char *copy; + char **end = aval + arrlen(aval) - 1, **start = aval; + + /* reverse the array */ + while (start < end) { + copy = *end; + *end-- = *start; + *start++ = copy; + } + } + } else { + static CompareFn sortfn[] = { + strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp, + nstrpcmp, invnstrpcmp, instrpcmp, invinstrpcmp + }; + + i = arrlen(aval); + if (i && (*aval[i-1] || --i)) + qsort(aval, i, sizeof(char *), sortfn[sortit-1]); + } } if (plan9) { + /* Handle RC_EXPAND_PARAM */ LinkNode tn; local_list1(tl); *--fstr = Marker; init_list1(tl, fstr); - if (!eval && !stringsubst(&tl, firstnode(&tl), ssub)) + if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0)) return NULL; *str = aptr; tn = firstnode(&tl); @@ -1792,7 +2640,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && subst_parse_str(&x, (qt && !nojoin))) + if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) return NULL; xlen = strlen(x); for (tn = firstnode(&tl); @@ -1824,11 +2672,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) return n; } } else { + /* + * Not RC_EXPAND_PARAM: simply join the first and + * last values. + * TODO: how about removing the restriction that + * aval[1] is non-NULL to promote consistency?, or + * simply changing the test so that we drop into + * the scalar branch, instead of tricking isarr? + */ x = aval[0]; if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && subst_parse_str(&x, (qt && !nojoin))) + if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) return NULL; xlen = strlen(x); strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied); @@ -1843,14 +2699,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && subst_parse_str(&x, (qt && !nojoin))) + if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) return NULL; if (qt && !*x && isarr != 2) y = dupstring(nulstring); else { y = dupstring(x); if (globsubst) - tokenize(y); + shtokenize(y); } insertlinknode(l, n, (void *) y), incnode(n); } @@ -1859,7 +2715,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && subst_parse_str(&x, (qt && !nojoin))) + if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) return NULL; xlen = strlen(x); *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied); @@ -1870,6 +2726,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (eval) n = on; } else { + /* + * Scalar value. Handle last minute transformations + * such as left- or right-padding and the (e) flag to + * revaluate the result. + */ int xlen; char *x; char *y; @@ -1878,7 +2739,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && subst_parse_str(&x, (qt && !nojoin))) + if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) return NULL; xlen = strlen(x); *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied); @@ -1905,15 +2766,18 @@ static char * arithsubst(char *a, char **bptr, char *rest) { char *s = *bptr, *t; - char buf[DIGBUFSIZE], *b = buf; + char buf[BDIGBUFSIZE], *b = buf; mnumber v; singsub(&a); v = matheval(a); - if (v.type & MN_FLOAT) + if ((v.type & MN_FLOAT) && !outputradix) b = convfloat(v.u.d, 0, 0, NULL); - else - convbase(buf, v.u.l, 0); + else { + if (v.type & MN_FLOAT) + v.u.l = (zlong) v.u.d; + convbase(buf, v.u.l, outputradix); + } t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) + strlen(rest) + 1); t--; @@ -1975,18 +2839,18 @@ modify(char **str, char **ptr) if (*ptr1) { zsfree(hsubl); hsubl = ztrdup(ptr1); - } + } if (!hsubl) { zerr("no previous substitution", NULL, 0); return; } zsfree(hsubr); for (tt = hsubl; *tt; tt++) - if (INULL(*tt)) + if (INULL(*tt) && *tt != Bnullkeep) chuck(tt--); untokenize(hsubl); for (tt = hsubr = ztrdup(ptr2); *tt; tt++) - if (INULL(*tt)) + if (INULL(*tt) && *tt != Bnullkeep) chuck(tt--); ptr2[-1] = del; if (sav) diff --git a/Src/zsh.h b/Src/zsh.h index 4500cb21e..a0959456a 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -128,7 +128,10 @@ struct mathfunc { #define DEFAULT_IFS " \t\n\203 " -/* Character tokens */ +/* + * Character tokens. + * These should match the characters in ztokens, defined in lex.c + */ #define Pound ((char) 0x84) #define String ((char) 0x85) #define Hat ((char) 0x86) @@ -149,15 +152,33 @@ struct mathfunc { #define Tilde ((char) 0x95) #define Qtick ((char) 0x96) #define Comma ((char) 0x97) +/* + * Null arguments: placeholders for single and double quotes + * and backslashes. + */ #define Snull ((char) 0x98) #define Dnull ((char) 0x99) #define Bnull ((char) 0x9a) -#define Nularg ((char) 0x9b) +/* + * Backslash which will be returned to "\" instead of being stripped + * when we turn the string into a printable format. + */ +#define Bnullkeep ((char) 0x9b) +/* + * Null argument that does not correspond to any character. + * This should be last as it does not appear in ztokens and + * is used to initialise the IMETA type in inittyptab(). + */ +#define Nularg ((char) 0x9c) -#define INULL(x) (((x) & 0xfc) == 0x98) +#define INULL(x) ((x) >= Snull && (x) <= Nularg) +/* + * Take care to update the use of IMETA appropriately when adding + * tokens here. + */ /* Marker used in paramsubst for rc_expand_param */ -#define Marker ((char) 0x9c) +#define Marker ((char) 0xa0) /* chars that need to be quoted if meant literally */ diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 3ad19368d..6abba0ab3 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -196,6 +196,20 @@ >* boringfile evenmoreboringfile boringfile evenmoreboringfile >boringfile evenmoreboringfile +# The following tests a bug where globsubst didn't preserve +# backslashes when printing out the original string. + str1='\\*\\' + ( + setopt globsubst nonomatch + [[ \\\\ = $str1 ]] && print -r '\\ matched by' $str1 + [[ \\foo\\ = $str1 ]] && print -r '\\foo matched by' $str1 + [[ a\\b\\ = $str1 ]] || print -r 'a\\b not matched by' $str1 + ) +0:globsubst with backslashes +>\\ matched by \\*\\ +>\\foo matched by \\*\\ +>a\\b not matched by \\*\\ + print -l "${$(print one word)}" "${=$(print two words)}" 0:splitting of $(...) inside ${...} >one word diff --git a/Test/ztst.zsh b/Test/ztst.zsh index fe996832c..c0fbc179f 100755 --- a/Test/ztst.zsh +++ b/Test/ztst.zsh @@ -22,9 +22,17 @@ # still not be good enough. Maybe we should trick it somehow. emulate -R zsh +# Ensure the locale does not screw up sorting. Don't supply a locale +# unless there's one set, to minimise problems. +[[ -n $LC_ALL ]] && LC_ALL=C +[[ -n $LC_COLLATE ]] && LC_COLLATE=C +[[ -n $LANG ]] && LANG=C + # Set the module load path to correspond to this build of zsh. # This Modules directory should have been created by "make check". [[ -d Modules/zsh ]] && module_path=( $PWD/Modules ) +# Allow this to be passed down. +export MODULE_PATH # We need to be able to save and restore the options used in the test. # We use the $options variable of the parameter module for this. @@ -47,12 +55,48 @@ ZTST_mainopts=(${(kv)options}) ZTST_testdir=$PWD ZTST_testname=$1 -# The source directory is not necessarily the current directory -ZTST_srcdir=${0%/*} +integer ZTST_testfailed + +# This is POSIX nonsense. Because of the vague feeling someone, somewhere +# may one day need to examine the arguments of "tail" using a standard +# option parser, every Unix user in the world is expected to switch +# to using "tail -n NUM" instead of "tail -NUM". Older versions of +# tail don't support this. +tail() { + emulate -L zsh + + if [[ -z $TAIL_SUPPORTS_MINUS_N ]]; then + local test + test=$(echo "foo\nbar" | command tail -n 1 2>/dev/null) + if [[ $test = bar ]]; then + TAIL_SUPPORTS_MINUS_N=1 + else + TAIL_SUPPORTS_MINUS_N=0 + fi + fi + + integer argi=${argv[(i)-<->]} + + if [[ $argi -le $# && $TAIL_SUPPORTS_MINUS_N = 1 ]]; then + argv[$argi]=(-n ${argv[$argi][2,-1]}) + fi + + command tail "$argv[@]" +} + +# The source directory is not necessarily the current directory, +# but if $0 doesn't contain a `/' assume it is. +if [[ $0 = */* ]]; then + ZTST_srcdir=${0%/*} +else + ZTST_srcdir=$PWD +fi [[ $ZTST_srcdir = /* ]] || ZTST_srcdir="$ZTST_testdir/$ZTST_srcdir" # Set the function autoload paths to correspond to this build of zsh. -fpath=( $ZTST_srcdir/../(Completion|Functions)/*~*/CVS(/) ) +fpath=( $ZTST_srcdir/../Functions/*~*/CVS(/) + $ZTST_srcdir/../Completion + $ZTST_srcdir/../Completion/*/*~*/CVS(/) ) : ${TMPPREFIX:=/tmp/zsh} # Temporary files for redirection inside tests. @@ -66,14 +110,15 @@ ZTST_terr=${TMPPREFIX}.ztst.terr.$$ ZTST_cleanup() { cd $ZTST_testdir - rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp ${TMPPREFIX}.ztst*$$ + rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp(N) \ + ${TMPPREFIX}.ztst*$$(N) } # This cleanup always gets performed, even if we abort. Later, # we should try and arrange that any test-specific cleanup # always gets called as well. -trap - 'print cleaning up... -ZTST_cleanup' INT QUIT TERM +##trap 'print cleaning up... +##ZTST_cleanup' INT QUIT TERM # Make sure it's clean now. rm -rf dummy.tmp *.tmp @@ -85,20 +130,31 @@ ZTST_testfailed() { print -r "Was testing: $ZTST_message" fi print -r "$ZTST_testname: test failed." - ZTST_cleanup - exit 1 + if [[ -n $ZTST_failmsg ]]; then + print -r "The following may (or may not) help identifying the cause: +$ZTST_failmsg" + fi + ZTST_testfailed=1 + return 1 } # Print messages if $ZTST_verbose is non-empty ZTST_verbose() { local lev=$1 shift - [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -- $* >&8 + [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -r -- $* >&8 +} +ZTST_hashmark() { + [[ ZTST_verbose -le 0 && -t 8 ]] && print -nu8 ${(pl:SECONDS::\#::\#\r:)} + (( SECONDS > COLUMNS+1 && (SECONDS -= COLUMNS) )) } -[[ ! -r $ZTST_testname ]] && ZTST_testfailed "can't read test file." +if [[ ! -r $ZTST_testname ]]; then + ZTST_testfailed "can't read test file." + exit 1 +fi -[[ -n $ZTST_verbose && $ZTST_verbose -ge 0 ]] && exec 8>&1 +exec 8>&1 exec 9<$ZTST_testname # The current line read from the test file. @@ -118,15 +174,18 @@ ZTST_getline() { # Get the name of the section. It may already have been read into # $curline, or we may have to skip some initial comments to find it. +# If argument present, it's OK to skip the reset of the current section, +# so no error if we find garbage. ZTST_getsect() { local match mbegin mend while [[ $ZTST_curline != '%'(#b)([[:alnum:]]##)* ]]; do ZTST_getline || return 1 [[ $ZTST_curline = [[:blank:]]# ]] && continue - if [[ $ZTST_curline != '%'[[:alnum:]]##* ]]; then + if [[ $# -eq 0 && $ZTST_curline != '%'[[:alnum:]]##* ]]; then ZTST_testfailed "bad line found before or after section: $ZTST_curline" + exit 1 fi done # have the next line ready waiting @@ -169,13 +228,14 @@ ${ZTST_curline[2,-1]}" $ZTST_redir" case $char in - '<') fn=$ZTST_in + ('<') fn=$ZTST_in ;; - '>') fn=$ZTST_out + ('>') fn=$ZTST_out ;; - '?') fn=$ZTST_err + ('?') fn=$ZTST_err ;; - *) ZTST_testfailed "bad redir operator: $char" + (*) ZTST_testfailed "bad redir operator: $char" + return 1 ;; esac if [[ $ZTST_flags = *q* ]]; then @@ -183,6 +243,8 @@ if [[ $ZTST_flags = *q* ]]; then else print -r -- "$ZTST_redir" >>$fn fi + +return 0 } # Execute an indented chunk. Redirections will already have @@ -191,9 +253,10 @@ ZTST_execchunk() { options=($ZTST_testopts) eval "$ZTST_code" ZTST_status=$? + # careful... ksh_arrays may be in effect. + ZTST_testopts=(${(kv)options[*]}) + options=(${ZTST_mainopts[*]}) ZTST_verbose 2 "ZTST_execchunk: status $ZTST_status" - ZTST_testopts=(${(kv)options}) - options=($ZTST_mainopts) return $ZTST_status } @@ -202,12 +265,27 @@ ZTST_execchunk() { ZTST_prepclean() { # Execute indented code chunks. while ZTST_getchunk; do - ZTST_execchunk >/dev/null || [[ -n $1 ]] || - ZTST_testfailed "non-zero status from preparation code: -$ZTST_code" + ZTST_execchunk >/dev/null || [[ -n $1 ]] || { + [[ -n "$ZTST_unimplemented" ]] || + ZTST_testfailed "non-zero status from preparation code: +$ZTST_code" && return 0 + } done } +# diff wrapper +ZTST_diff() { + local diff_out diff_ret + + diff_out=$(diff "$@") + diff_ret="$?" + if [[ "$diff_ret" != "0" ]]; then + print -r "$diff_out" + fi + + return "$diff_ret" +} + ZTST_test() { local last match mbegin mend found @@ -215,6 +293,7 @@ ZTST_test() { rm -f $ZTST_in $ZTST_out $ZTST_err touch $ZTST_in $ZTST_out $ZTST_err ZTST_message='' + ZTST_failmsg='' found=0 ZTST_verbose 2 "ZTST_test: looking for new test" @@ -223,14 +302,14 @@ ZTST_test() { ZTST_verbose 2 "ZTST_test: examining line: $ZTST_curline" case $ZTST_curline in - %*) if [[ $found = 0 ]]; then + (%*) if [[ $found = 0 ]]; then break 2 else last=1 break fi ;; - [[:space:]]#) + ([[:space:]]#) if [[ $found = 0 ]]; then ZTST_getline || break 2 continue @@ -238,7 +317,7 @@ $ZTST_curline" break fi ;; - [[:space:]]##[^[:space:]]*) ZTST_getchunk + ([[:space:]]##[^[:space:]]*) ZTST_getchunk if [[ $ZTST_curline == (#b)([-0-9]##)([[:alpha:]]#)(:*)# ]]; then ZTST_xstatus=$match[1] ZTST_flags=$match[2] @@ -246,29 +325,38 @@ $ZTST_curline" else ZTST_testfailed "expecting test status at: $ZTST_curline" + return 1 fi ZTST_getline found=1 ;; - '<'*) ZTST_getredir + ('<'*) ZTST_getredir || return 1 found=1 ;; - '>'*) ZTST_getredir + ('>'*) ZTST_getredir || return 1 found=1 ;; - '?'*) ZTST_getredir + ('?'*) ZTST_getredir || return 1 found=1 ;; - *) ZTST_testfailed "bad line in test block: + ('F:'*) ZTST_failmsg="${ZTST_failmsg:+${ZTST_failmsg} +} ${ZTST_curline[3,-1]}" + ZTST_getline + found=1 + ;; + (*) ZTST_testfailed "bad line in test block: $ZTST_curline" + return 1 ;; esac done # If we found some code to execute... if [[ -n $ZTST_code ]]; then + ZTST_hashmark ZTST_verbose 1 "Running test: $ZTST_message" ZTST_verbose 2 "ZTST_test: expecting status: $ZTST_xstatus" + ZTST_verbose 2 "Input: $ZTST_in, output: $ZTST_out, error: $ZTST_terr" ZTST_execchunk <$ZTST_in >$ZTST_tout 2>$ZTST_terr @@ -278,6 +366,7 @@ $ZTST_curline" $ZTST_code${$(<$ZTST_terr):+ Error output: $(<$ZTST_terr)}" + return 1 fi ZTST_verbose 2 "ZTST_test: test produced standard output: @@ -286,15 +375,17 @@ ZTST_test: and standard error: $(<$ZTST_terr)" # Now check output and error. - if [[ $ZTST_flags != *d* ]] && ! diff -c $ZTST_out $ZTST_tout; then + if [[ $ZTST_flags != *d* ]] && ! ZTST_diff -c $ZTST_out $ZTST_tout; then ZTST_testfailed "output differs from expected as shown above for: $ZTST_code${$(<$ZTST_terr):+ Error output: $(<$ZTST_terr)}" + return 1 fi - if [[ $ZTST_flags != *D* ]] && ! diff -c $ZTST_err $ZTST_terr; then + if [[ $ZTST_flags != *D* ]] && ! ZTST_diff -c $ZTST_err $ZTST_terr; then ZTST_testfailed "error output differs from expected as shown above for: $ZTST_code" + return 1 fi fi ZTST_verbose 1 "Test successful." @@ -312,35 +403,52 @@ $ZTST_code" typeset -A ZTST_sects ZTST_sects=(prep 0 test 0 clean 0) +print "$ZTST_testname: starting." + # Now go through all the different sections until the end. -while ZTST_getsect; do +# prep section may set ZTST_unimplemented, in this case the actual +# tests will be skipped +ZTST_skipok= +ZTST_unimplemented= +while [[ -z "$ZTST_unimplemented" ]] && ZTST_getsect $ZTST_skipok; do case $ZTST_cursect in - prep) if (( ${ZTST_sects[prep]} + ${ZTST_sects[test]} + \ + (prep) if (( ${ZTST_sects[prep]} + ${ZTST_sects[test]} + \ ${ZTST_sects[clean]} )); then ZTST_testfailed "\`prep' section must come first" + exit 1 fi ZTST_prepclean ZTST_sects[prep]=1 ;; - test) + (test) if (( ${ZTST_sects[test]} + ${ZTST_sects[clean]} )); then ZTST_testfailed "bad placement of \`test' section" + exit 1 fi + # careful here: we can't execute ZTST_test before || or && + # because that affects the behaviour of traps in the tests. ZTST_test + (( $? )) && ZTST_skipok=1 ZTST_sects[test]=1 ;; - clean) + (clean) if (( ${ZTST_sects[test]} == 0 || ${ZTST_sects[clean]} )); then ZTST_testfailed "bad use of \`clean' section" + else + ZTST_prepclean 1 + ZTST_sects[clean]=1 fi - ZTST_prepclean 1 - ZTST_sects[clean]=1 + ZTST_skipok= ;; *) ZTST_testfailed "bad section name: $ZTST_cursect" ;; esac done -print "$ZTST_testname: all tests successful." +if [[ -n "$ZTST_unimplemented" ]]; then + print "$ZTST_testname: skipped ($ZTST_unimplemented)" +elif (( ! $ZTST_testfailed )); then + print "$ZTST_testname: all tests successful." +fi ZTST_cleanup -exit 0 +exit $(( ZTST_testfailed )) -- cgit 1.4.1