From d2330ba0554b09a3f942a921acfbbabcf6466bef Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Thu, 6 Apr 2000 18:44:01 +0000 Subject: 10547: (#s) and (#e) pattern assertions --- ChangeLog | 6 + Doc/Zsh/expn.yo | 11 + Misc/globtests | 76 +++++ Src/pattern.c | 91 ++++-- Src/subst.c | 852 +++++++++++++++++++++++++++++++++++++++++-------------- Test/11glob.ztst | 12 + Test/ztst.zsh | 93 ++++-- 7 files changed, 870 insertions(+), 271 deletions(-) diff --git a/ChangeLog b/ChangeLog index 58ed4c8df..678ee52e4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2000-04-06 Peter Stephenson + + * 10547: Doc/Zsh/expn.yo, Misc/globtests, Src/pattern.c, + Src/subst.c, Test/11glob.ztst, Test/ztst.zsh: add + (#s) and (#e) to match at start and end of string. + 2000-04-06 Andrew Main * zefram2: Src/lex.c: Support "3&> foo" etc. diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 9d2f035c8..172949176 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1299,6 +1299,17 @@ item(tt(a)var(num))( Approximate matching: var(num) errors are allowed in the string matched by the pattern. The rules for this are described in the next subsection. ) +item(tt(s), tt(e))( +Unlike the other flags, these have only a local effect, and each must +appear on its own: `tt((#s))' and `tt((#e))' are the only valid forms. +The `tt((#s))' flag succeeds only at the start of the test string, and the +`tt((#e))' flag succeeds only at the end of the test string; they +correspond to `tt(^)' and `tt($)' in standard regular expressions. They +are useful for matching path segments in patterns. For example, +`tt(*((#s)|/)test((#e)|/)*)' matches a path segment `tt(test)' in any of +the following strings: tt(test), tt(test/at/start), tt(at/end/test), +tt(in/test/middle). +) enditem() For example, the test string tt(fooxx) can be matched by the pattern diff --git a/Misc/globtests b/Misc/globtests index 728aee5ae..9fbab98fa 100755 --- a/Misc/globtests +++ b/Misc/globtests @@ -14,6 +14,13 @@ while read res str pat; do (( failed++ )) fi done <33 +t 633 <-1000>33 +t 633 <1->33 +t 633 <->33 +# Approximate matching +t READ.ME (#ia1)readme +f READ..ME (#ia1)readme +t README (#ia1)readm +t READM (#ia1)readme +t README (#ia1)eadme +t EADME (#ia1)readme +t READEM (#ia1)readme +f ADME (#ia1)readme +f README (#ia1)read +t bob (#a1)[b][b] +f bob (#a1)[b][b]a +t bob (#a1)[b]o[b]a +f bob (#a1)[c]o[b] +t abcd (#a2)XbcX +t abcd (#a2)ad +t ad (#a2)abcd +t abcd (#a2)bd +t bd (#a2)abcd +t badc (#a2)abcd +# This next one is a little tricky: a[d]bc[] = a[]bc[d] +t adbc (#a2)abcd +f dcba (#a2)abcd +# the next one is [d][cb][a] = [a][bc][d] with a transposition +t dcba (#a3)abcd +t aabaXaaabY (#a1)(a#b)#Y +t aabaXaaabY (#a1)(a#b)(a#b)Y +t aaXaaaaabY (#a1)(a#b)(a#b)Y +t aaaXaaabY (#a1)(a##b)##Y +t aaaXbaabY (#a1)(a##b)##Y +f read.me (#ia1)README~READ.ME +t read.me (#ia1)README~READ_ME +f read.me (#ia1)README~(#a1)READ_ME +t test *((#s)|/)test((#e)|/)* +t test/path *((#s)|/)test((#e)|/)* +t path/test *((#s)|/)test((#e)|/)* +t path/test/ohyes *((#s)|/)test((#e)|/)* +f atest *((#s)|/)test((#e)|/)* +f testy *((#s)|/)test((#e)|/)* +f testy/path *((#s)|/)test((#e)|/)* +f path/atest *((#s)|/)test((#e)|/)* +f atest/path *((#s)|/)test((#e)|/)* +f path/testy *((#s)|/)test((#e)|/)* +f path/testy/ohyes *((#s)|/)test((#e)|/)* +f path/atest/ohyes *((#s)|/)test((#e)|/)* EOT print "$failed tests failed." diff --git a/Src/pattern.c b/Src/pattern.c index d70c5c1d9..1c90f72a1 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -83,6 +83,8 @@ typedef union upat *Upat; #define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */ #define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */ #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */ +#define P_ISSTART 0x09 /* no Match start of string. */ +#define P_ISEND 0x0a /* no Match end of string. */ /* numbered so we can test bit 5 for a branch */ #define P_BRANCH 0x20 /* node Match this alternative, or the next... */ #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */ @@ -645,34 +647,44 @@ patcompbranch(int *flagp) /* Globbing flags. */ char *pp1 = patparse; int oldglobflags = patglobflags; + long assert; patparse += (*patparse == '@') ? 3 : 2; - if (!patgetglobflags(&patparse)) - return 0; - if (pp1 == patstart) { - /* Right at start of pattern, the simplest case. - * Put them into the flags and don't emit anything. - */ - ((Patprog)patout)->globflags = patglobflags; - continue; - } else if (!*patparse) { - /* Right at the end, so just leave the flags for - * the next Patprog in the chain to pick up. + if (!patgetglobflags(&patparse, &assert)) + return 0; + if (assert) { + /* + * Start/end assertion looking like flags, but + * actually handled as a normal node */ - break; - } - /* - * Otherwise, we have to stick them in as a pattern - * matching nothing. - */ - if (oldglobflags != patglobflags) { - /* Flags changed */ - union upat up; - latest = patnode(P_GFLAGS); - up.l = patglobflags; - patadd((char *)&up, 0, sizeof(union upat), 0); + latest = patnode(assert); + flags = 0; } else { - /* No effect. */ - continue; + if (pp1 == patstart) { + /* Right at start of pattern, the simplest case. + * Put them into the flags and don't emit anything. + */ + ((Patprog)patout)->globflags = patglobflags; + continue; + } else if (!*patparse) { + /* Right at the end, so just leave the flags for + * the next Patprog in the chain to pick up. + */ + break; + } + /* + * Otherwise, we have to stick them in as a pattern + * matching nothing. + */ + if (oldglobflags != patglobflags) { + /* Flags changed */ + union upat up; + latest = patnode(P_GFLAGS); + up.l = patglobflags; + patadd((char *)&up, 0, sizeof(union upat), 0); + } else { + /* No effect. */ + continue; + } } } else if (isset(EXTENDEDGLOB) && *patparse == Hat) { /* @@ -707,10 +719,12 @@ patcompbranch(int *flagp) /**/ int -patgetglobflags(char **strp) +patgetglobflags(char **strp, long *assertp) { char *nptr, *ptr = *strp; zlong ret; + + *assertp = 0; /* (#X): assumes we are still positioned on the first X */ for (; *ptr && *ptr != Outpar; ptr++) { switch (*ptr) { @@ -763,12 +777,23 @@ patgetglobflags(char **strp) patglobflags &= ~GF_MATCHREF; break; + case 's': + *assertp = P_ISSTART; + break; + + case 'e': + *assertp = P_ISEND; + break; + default: return 0; } } if (*ptr != Outpar) return 0; + /* Start/end assertions must appear on their own. */ + if (*assertp && (*strp)[1] != Outpar) + return 0; *strp = ptr + 1; return 1; } @@ -1989,6 +2014,14 @@ patmatch(Upat prog) * anything here. */ return 0; + case P_ISSTART: + if (patinput != patinstart) + fail = 1; + break; + case P_ISEND: + if (*patinput) + fail = 1; + break; case P_END: if (!(fail = (*patinput && !(patflags & PAT_NOANCH)))) return 1; @@ -2387,6 +2420,12 @@ patprop(Upat op) case P_GFLAGS: p = "GFLAGS"; break; + case P_ISSTART: + p = "ISSTART"; + break; + case P_ISEND: + p = "ISEND"; + break; case P_NOTHING: p = "NOTHING"; break; diff --git a/Src/subst.c b/Src/subst.c index 8f840d266..668d5eda3 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -42,27 +42,22 @@ char nulstring[] = {Nularg, '\0'}; * - Brace expansion * - Tilde and equals substitution * - * Bits 0 and 1 of flags are used in filesub. - * bit 0 is set when we are doing MAGIC_EQUALSUBST or normal - * assignment but not a typeset. - * bit 1 is set on a real assignment (both typeset and normal). - * bit 2 is a flag to paramsubst (single word sub) + * PF_* flags are defined in zsh.h */ /**/ -void +mod_export void prefork(LinkList list, int flags) { LinkNode node; - MUSTUSEHEAP("prefork"); for (node = firstnode(list); node; incnode(node)) { - char *str, *str3; + char *str, c; - str = str3 = (char *)getdata(node); - if ((*str == Inang || *str == Outang || *str == Equals) && + str = (char *)getdata(node); + if (((c = *str) == Inang || c == Outang || c == Equals) && str[1] == Inpar) { - if (*str == Inang || *str == Outang) + if (c == Inang || c == Outang) setdata(node, (void *) getproc(str)); /* <(...) or >(...) */ else setdata(node, (void *) getoutputfile(str)); /* =(...) */ @@ -70,20 +65,22 @@ prefork(LinkList list, int flags) return; } else { if (isset(SHFILEEXPANSION)) - filesub((char **)getaddrdata(node), flags & 3); - if (!(node = stringsubst(list, node, flags & 4))) + filesub((char **)getaddrdata(node), + flags & (PF_TYPESET|PF_ASSIGN)); + if (!(node = stringsubst(list, node, flags & PF_SINGLE))) return; } } for (node = firstnode(list); node; incnode(node)) { if (*(char *)getdata(node)) { remnulargs(getdata(node)); - if (unset(IGNOREBRACES) && !(flags & 4)) + if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) while (hasbraces(getdata(node))) xpandbraces(list, &node); if (unset(SHFILEEXPANSION)) - filesub((char **)getaddrdata(node), flags & 3); - } else if (!(flags & 4)) + filesub((char **)getaddrdata(node), + flags & (PF_TYPESET|PF_ASSIGN)); + } else if (!(flags & PF_SINGLE)) uremnode(list, node); if (errflag) return; @@ -96,14 +93,16 @@ stringsubst(LinkList list, LinkNode node, int ssub) { int qt; char *str3 = (char *)getdata(node); - char *str = str3; + char *str = str3, c; - while (!errflag && *str) { - if ((qt = *str == Qstring) || *str == String) - if (str[1] == Inpar) { + while (!errflag && (c = *str)) { + if ((qt = c == Qstring) || c == String) { + if ((c = str[1]) == Inpar) { + if (!qt) + mult_isarr = 1; str++; goto comsub; - } else if (str[1] == Inbrack) { + } else if (c == Inbrack) { /* $[...] */ char *str2 = str; str2++; @@ -115,7 +114,7 @@ stringsubst(LinkList list, LinkNode node, int ssub) str = arithsubst(str + 2, &str3, str2); setdata(node, (void *) str3); continue; - } else if (str[1] == Snull) { + } else if (c == Snull) { str = getkeystring(str, NULL, 4, NULL); continue; } else { @@ -125,21 +124,25 @@ stringsubst(LinkList list, LinkNode node, int ssub) str3 = (char *)getdata(node); continue; } - else if ((qt = *str == Qtick) || *str == Tick) + } else if ((qt = c == Qtick) || c == Tick) comsub: { LinkList pl; char *s, *str2 = str; char endchar; int l1, l2; - if (*str == Inpar) { + if (c == Inpar) { endchar = Outpar; str[-1] = '\0'; +#ifdef DEBUG if (skipparens(Inpar, Outpar, &str)) - DPUTS(1, "BUG: parse error in command substitution"); + dputs("BUG: parse error in command substitution"); +#else + skipparens(Inpar, Outpar, &str); +#endif str--; } else { - endchar = *str; + endchar = c; *str = '\0'; while (*++str != endchar) @@ -148,7 +151,8 @@ stringsubst(LinkList list, LinkNode node, int ssub) *str++ = '\0'; if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') { /* Math substitution of the form $((...)) */ - str = arithsubst(str2 + 1, &str3, str); + str[-2] = '\0'; + str = arithsubst(str2 + 2, &str3, str); setdata(node, (void *) str3); continue; } @@ -159,12 +163,12 @@ stringsubst(LinkList list, LinkNode node, int ssub) * be left unchanged. Note that the lexer doesn't tokenize * * the body of a command substitution so if there are some * * tokens here they are from a ${(e)~...} substitution. */ - for (str = str2; *++str; ) - if (itok(*str) && *str != Nularg && - !(endchar != Outpar && *str == Bnull && + for (str = str2; (c = *++str); ) + if (itok(c) && c != Nularg && + !(endchar != Outpar && c == Bnull && (str[1] == '$' || str[1] == '\\' || str[1] == '`' || (qt && str[1] == '"')))) - *str = ztokens[*str - Pound]; + *str = ztokens[c - Pound]; str++; if (!(pl = getoutput(str2 + 1, qt || ssub))) { zerr("parse error in command substitution", NULL, 0); @@ -182,7 +186,7 @@ stringsubst(LinkList list, LinkNode node, int ssub) l2 = strlen(s); if (nonempty(pl)) { LinkNode n = lastnode(pl); - str2 = (char *) ncalloc(l1 + l2 + 1); + str2 = (char *) hcalloc(l1 + l2 + 1); strcpy(str2, str3); strcpy(str2 + l1, s); setdata(node, str2); @@ -191,7 +195,7 @@ stringsubst(LinkList list, LinkNode node, int ssub) l1 = 0; l2 = strlen(s); } - str2 = (char *) ncalloc(l1 + l2 + strlen(str) + 1); + str2 = (char *) hcalloc(l1 + l2 + strlen(str) + 1); if (l1) strcpy(str2, str3); strcpy(str2 + l1, s); @@ -206,15 +210,15 @@ stringsubst(LinkList list, LinkNode node, int ssub) } /**/ -void -globlist(LinkList list) +mod_export void +globlist(LinkList list, int nountok) { LinkNode node, next; badcshglob = 0; for (node = firstnode(list); !errflag && node; node = next) { next = nextnode(node); - glob(list, node); + glob(list, node, nountok); } if (badcshglob == 1) zerr("no match", NULL, 0); @@ -223,71 +227,84 @@ globlist(LinkList list) /* perform substitution on a single word */ /**/ -void +mod_export void singsub(char **s) { - LinkList foo; + local_list1(foo); - foo = newlinklist(); - addlinknode(foo, *s); - prefork(foo, 4); + init_list1(foo, *s); + + prefork(&foo, PF_SINGLE); if (errflag) return; - *s = (char *) ugetnode(foo); - DPUTS(nonempty(foo), "BUG: singsub() produced more than one word!"); + *s = (char *) ugetnode(&foo); + DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!"); } /* Perform substitution on a single word. Unlike with singsub, the * - * result can have more than one words. A single word result is sroted * + * result can have more than one word. A single word result is stored * * in *s and *isarr is set to zero; otherwise *isarr is set to 1 and * * the result is stored in *a. If `a' is zero a multiple word result is * * joined using sep or the IFS parameter if sep is zero and the result * * is returned in *s. The return value is true iff the expansion * - * resulted in an empty list */ + * resulted in an empty list. * + * The mult_isarr variable is used by paramsubst() to tell if it yields * + * an array. */ + +/**/ +static int mult_isarr; /**/ static int multsub(char **s, char ***a, int *isarr, char *sep) { - LinkList foo; - int l; + int l, omi = mult_isarr; char **r, **p; + local_list1(foo); - foo = newlinklist(); - addlinknode(foo, *s); - prefork(foo, 0); + mult_isarr = 0; + init_list1(foo, *s); + prefork(&foo, 0); if (errflag) { if (isarr) *isarr = 0; + mult_isarr = omi; return 0; } - if ((l = countlinknodes(foo)) > 1) { - p = r = ncalloc((l + 1) * sizeof(char*)); - while (nonempty(foo)) - *p++ = (char *)ugetnode(foo); + if ((l = countlinknodes(&foo))) { + p = r = hcalloc((l + 1) * sizeof(char*)); + while (nonempty(&foo)) + *p++ = (char *)ugetnode(&foo); *p = NULL; - if (a) { + if (a && mult_isarr) { *a = r; - *isarr = 1; + *isarr = SCANPM_MATCHMANY; + mult_isarr = omi; return 0; } - *s = sepjoin(r, NULL); + *s = sepjoin(r, NULL, 1); + mult_isarr = omi; + if (isarr) + *isarr = 0; return 0; } if (l) - *s = (char *) ugetnode(foo); + *s = (char *) ugetnode(&foo); else *s = dupstring(""); if (isarr) *isarr = 0; + mult_isarr = omi; return !l; } -/* ~, = subs: assign = 2 => typeset; assign = 1 => something that looks - like an assignment but may not be; assign = 3 => normal assignment */ +/* + * ~, = subs: assign & PF_TYPESET => typeset or magic equals + * assign & PF_ASSIGN => normal assignment + */ /**/ -void +mod_export void filesub(char **namptr, int assign) { char *sub = NULL, *str, *ptr; @@ -298,12 +315,8 @@ filesub(char **namptr, int assign) if (!assign) return; - if (assign < 3) + if (assign & PF_TYPESET) { if ((*namptr)[1] && (sub = strchr(*namptr + 1, Equals))) { - if (assign == 1) - for (ptr = *namptr; ptr != sub; ptr++) - if (!iident(*ptr) && !INULL(*ptr)) - return; str = sub + 1; if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) { sub[1] = '\0'; @@ -311,6 +324,7 @@ filesub(char **namptr, int assign) } } else return; + } ptr = *namptr; while ((sub = strchr(ptr, ':'))) { @@ -325,7 +339,7 @@ filesub(char **namptr, int assign) } /**/ -int +mod_export int filesubstr(char **namptr, int assign) { #define isend(c) ( !(c) || (c)=='/' || (c)==Inpar || (assign && (c)==':') ) @@ -382,11 +396,11 @@ filesubstr(char **namptr, int assign) for (pp = str + 1; !isend2(*pp); pp++); sav = *pp; *pp = 0; - if (!(cnam = findcmd(str + 1))) { + if (!(cnam = findcmd(str + 1, 1))) { Alias a = (Alias) aliastab->getnode(aliastab, str + 1); if (a) - cnam = ztrdup(a->text); + cnam = a->text; else { if (isset(NOMATCH)) zerr("%s not found", str + 1, 0); @@ -394,7 +408,6 @@ filesubstr(char **namptr, int assign) } } *namptr = dupstring(cnam); - zsfree(cnam); if (sav) { *pp = sav; *namptr = dyncat(*namptr, pp); @@ -408,20 +421,27 @@ filesubstr(char **namptr, int assign) /**/ static char * -strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub) +strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub, + int copied) { + char *dest; int pl = pe - pb; - char *dest = ncalloc(pl + l + (s ? strlen(s) : 0) + 1); - - *d = dest; - strncpy(dest, pb, pl); - dest += pl; - strcpy(dest, src); - if (glbsub) - tokenize(dest); - dest += l; - if (s) - strcpy(dest, s); + + if (!pl && (!s || !*s)) { + *d = dest = (copied ? src : dupstring(src)); + if (glbsub) + tokenize(dest); + } else { + *d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1); + strncpy(dest, pb, pl); + dest += pl; + strcpy(dest, src); + if (glbsub) + tokenize(dest); + dest += l; + if (s) + strcpy(dest, s); + } return dest; } @@ -529,7 +549,7 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char if (lr == ls) return str; - r = ret = (char *)halloc(lr + 1); + r = ret = (char *)zhalloc(lr + 1); if (prenum) { if (postnum) { @@ -657,7 +677,7 @@ get_intarg(char **s) { char *t = get_strarg(*s + 1); char *p, sav; - long ret; + zlong ret; if (!*t) return -1; @@ -671,7 +691,7 @@ get_intarg(char **s) singsub(&p); if (errflag) return -1; - ret = matheval(p); + ret = mathevali(p); if (errflag) return -1; if (ret < 0) @@ -679,6 +699,26 @@ get_intarg(char **s) return ret < 0 ? -ret : ret; } +/* Parsing for the (e) flag. */ + +static int +subst_parse_str(char **sp, int single) +{ + char *s; + + *sp = s = dupstring(*sp); + + if (!parsestr(s)) { + if (!single) { + for (; *s; s++) + if (*s == Qstring) + *s = String; + } + return 0; + } + return 1; +} + /* parameter substitution */ #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring) @@ -688,10 +728,9 @@ get_intarg(char **s) LinkNode paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) { - char *aptr = *str; + char *aptr = *str, c, cc; char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n); int colf; /* != 0 means we found a colon after the name */ - int doub = 0; /* != 0 means we have %%, not %, or ##, not # */ int isarr = 0; int plan9 = isset(RCEXPANDPARAM); int globsubst = isset(GLOBSUBST); @@ -699,45 +738,58 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) int whichlen = 0; int chkset = 0; int vunset = 0; + int wantt = 0; int spbreak = isset(SHWORDSPLIT) && !ssub && !qt; char *val = NULL, **aval = NULL; unsigned int fwidth = 0; - Value v; + struct value vbuf; + Value v = NULL; int flags = 0; int flnum = 0; - int substr = 0; int sortit = 0, casind = 0; int casmod = 0; + int quotemod = 0, quotetype = 0, quoteerr = 0; + int visiblemod = 0; char *sep = NULL, *spsep = NULL; char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL; - long prenum = 0, postnum = 0; + char *replstr = NULL; /* replacement string for /orig/repl */ + zlong prenum = 0, postnum = 0; int copied = 0; int arrasg = 0; int eval = 0; + int aspar = 0; + int presc = 0; int nojoin = 0; char inbrace = 0; /* != 0 means ${...}, otherwise $... */ + char hkeys = 0; + char hvals = 0; + int subexp; *s++ = '\0'; - if (!ialnum(*s) && *s != '#' && *s != Pound && *s != '-' && - *s != '!' && *s != '$' && *s != String && *s != Qstring && - *s != '?' && *s != Quest && *s != '_' && - *s != '*' && *s != Star && *s != '@' && *s != '{' && - *s != Inbrace && *s != '=' && *s != Equals && *s != Hat && - *s != '^' && *s != '~' && *s != Tilde && *s != '+') { + if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' && + c != '!' && c != '$' && c != String && c != Qstring && + c != '?' && c != Quest && c != '_' && + c != '*' && c != Star && c != '@' && c != '{' && + c != Inbrace && c != '=' && c != Equals && c != Hat && + c != '^' && c != '~' && c != Tilde && c != '+') { s[-1] = '$'; *str = s; return n; } - DPUTS(*s == '{', "BUG: inbrace == '{' in paramsubst()"); - if (*s == Inbrace) { + DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()"); + if (c == Inbrace) { inbrace = 1; s++; - if (*s == '(' || *s == Inpar) { + if ((c = *s) == '!' && s[1] != Outbrace && emulation == EMULATE_KSH) { + hkeys = SCANPM_WANTKEYS; + s++; + } else if (c == '(' || c == Inpar) { char *t, sav; int tt = 0; - long num; + zlong num; int escapes = 0; int klen; +#define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C)) #define UNTOK_AND_ESCAPE(X) {\ untokenize(X = dupstring(s + 1));\ if (escapes) {\ @@ -746,34 +798,34 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) }\ } - for (s++; *s != ')' && *s != Outpar; s++, tt = 0) { - switch (*s) { + for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) { + switch (c) { case ')': case Outpar: break; case 'A': - arrasg = 1; + ++arrasg; break; case '@': nojoin = 1; break; case 'M': - flags |= 8; + flags |= SUB_MATCH; break; case 'R': - flags |= 16; + flags |= SUB_REST; break; case 'B': - flags |= 32; + flags |= SUB_BIND; break; case 'E': - flags |= 64; + flags |= SUB_EIND; break; case 'N': - flags |= 128; + flags |= SUB_LEN; break; case 'S': - substr = 1; + flags |= SUB_SUBSTR; break; case 'I': flnum = get_intarg(&s); @@ -800,9 +852,27 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case 'i': casind = 1; break; + + case 'V': + visiblemod++; + break; + + case 'q': + quotemod++, quotetype++; + break; + case 'Q': + quotemod--; + break; + case 'X': + quoteerr = 1; + break; + case 'e': eval = 1; break; + case 'P': + aspar = 1; + break; case 'c': whichlen = 1; @@ -851,7 +921,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) prenum = num; else postnum = num; - if (s[1] != sav) + if (UNTOK(s[1]) != UNTOK(sav)) break; t = get_strarg(++s); if (!*t) @@ -865,7 +935,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) *t = sav; sav = *s; s = t + 1; - if (*s != sav) { + if (UNTOK(*s) != UNTOK(sav)) { s--; break; } @@ -886,6 +956,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) escapes = 1; break; + case 'k': + hkeys = SCANPM_WANTKEYS; + break; + case 'v': + hvals = SCANPM_WANTVALS; + break; + + case 't': + wantt = 1; + break; + + case '%': + presc++; + break; + default: flagerr: zerr("error in flags", NULL, 0); @@ -904,31 +989,33 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) postmul = " "; for (;;) { - if (*s == '^' || *s == Hat) { - if (*++s == '^' || *s == Hat) { + if ((c = *s) == '^' || c == Hat) { + if ((c = *++s) == '^' || c == Hat) { plan9 = 0; s++; } else plan9 = 1; - } else if (*s == '=' || *s == Equals) { - if (*++s == '=' || *s == Equals) { + } else if ((c = *s) == '=' || c == Equals) { + if ((c = *++s) == '=' || c == Equals) { spbreak = 0; s++; } else spbreak = 1; - } else if ((*s == '#' || *s == Pound) && - (iident(s[1]) - || s[1] == '*' || s[1] == Star || s[1] == '@' - || (isstring(s[1]) && (s[2] == Inbrace || s[2] == Inpar)))) + } else if ((c == '#' || c == Pound) && + (iident(cc = s[1]) + || cc == '*' || cc == Star || cc == '@' + || cc == '-' || (cc == ':' && s[2] == '-') + || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) getlen = 1 + whichlen, s++; - else if (*s == '~' || *s == Tilde) { - if (*++s == '~' || *s == Tilde) { + else if (c == '~' || c == Tilde) { + if ((c = *++s) == '~' || c == Tilde) { globsubst = 0; s++; } else globsubst = 1; - } else if (*s == '+') - if (iident(s[1])) + } else if (c == '+') { + if (iident(s[1]) || (aspar && isstring(s[1]) && + (s[2] == Inbrace || s[2] == Inpar))) chkset = 1, s++; else if (!inbrace) { *aptr = '$'; @@ -938,13 +1025,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) zerr("bad substitution", NULL, 0); return NULL; } + } else if (inbrace && INULL(*s)) + s++; else break; } globsubst = globsubst && !qt; idbeg = s; - if (s[-1] && isstring(*s) && (s[1] == Inbrace || s[1] == Inpar)) { + if ((subexp = (inbrace && s[-1] && isstring(*s) && + (s[1] == Inbrace || s[1] == Inpar)))) { int sav; int quoted = *s == Qstring; @@ -952,17 +1042,78 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s); sav = *s; *s = 0; - if (multsub(&val, &aval, &isarr, NULL) && quoted) { - isarr = -1; - aval = alloc(sizeof(char *)); - } - if (isarr) + if (multsub(&val, (aspar ? NULL : &aval), &isarr, NULL) && quoted) { isarr = -1; - copied = 1; + aval = (char **) hcalloc(sizeof(char *)); + aspar = 0; + } else if (aspar) + idbeg = val; *s = sav; + while (INULL(*s)) + s++; v = (Value) NULL; - } else if (!(v = getvalue(&s, (unset(KSHARRAYS) || inbrace) ? 1 : -1))) - vunset = 1; + } else if (aspar) { + if ((v = getvalue(&vbuf, &s, 1))) { + val = idbeg = getstrvalue(v); + subexp = 1; + } else + vunset = 1; + } + if (!subexp || aspar) { + char *ov = val; + + if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s), + (wantt ? -1 : + ((unset(KSHARRAYS) || inbrace) ? 1 : -1)), + hkeys|hvals|(arrasg ? SCANPM_ASSIGNING : 0))) || + (v->pm && (v->pm->flags & PM_UNSET))) + vunset = 1; + + if (wantt) { + if (v && v->pm && !(v->pm->flags & PM_UNSET)) { + int f = v->pm->flags; + + switch (PM_TYPE(f)) { + case PM_SCALAR: val = "scalar"; break; + case PM_ARRAY: val = "array"; break; + case PM_INTEGER: val = "integer"; break; + case PM_EFLOAT: + case PM_FFLOAT: val = "float"; break; + case PM_HASHED: val = "association"; break; + } + val = dupstring(val); + if (v->pm->level) + val = dyncat(val, "-local"); + if (f & PM_LEFT) + val = dyncat(val, "-left"); + if (f & PM_RIGHT_B) + val = dyncat(val, "-right_blanks"); + if (f & PM_RIGHT_Z) + val = dyncat(val, "-right_zeros"); + if (f & PM_LOWER) + val = dyncat(val, "-lower"); + if (f & PM_UPPER) + val = dyncat(val, "-upper"); + if (f & PM_READONLY) + val = dyncat(val, "-readonly"); + if (f & PM_TAGGED) + val = dyncat(val, "-tag"); + if (f & PM_EXPORTED) + val = dyncat(val, "-export"); + if (f & PM_UNIQUE) + val = dyncat(val, "-unique"); + if (f & PM_HIDE) + val = dyncat(val, "-hide"); + if (f & PM_SPECIAL) + val = dyncat(val, "-special"); + vunset = 0; + } else + val = dupstring(""); + + v = NULL; + isarr = 0; + } + } while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) { if (!v) { Param pm; @@ -986,9 +1137,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (getindex(&s, v) || s == os) break; } - if ((isarr = v->isarr)) - aval = getarrvalue(v); - else { + if ((isarr = v->isarr)) { + /* No way to get here with v->inv != 0, so getvaluearr() * + * is called by getarrvalue(); needn't test PM_HASHED. */ + if (v->isarr == SCANPM_WANTINDEX) { + isarr = v->isarr = 0; + val = dupstring(v->pm->nam); + } else + aval = getarrvalue(v); + } else { if (v->pm->flags & PM_ARRAY) { int tmplen = arrlen(v->pm->gets.afn(v->pm)); @@ -1013,7 +1170,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) else while (iblank(*t)) t++; - val = (char *)ncalloc(fwidth + 1); + val = (char *) hcalloc(fwidth + 1); val[fwidth] = '\0'; if ((t0 = strlen(t)) > fwidth) t0 = fwidth; @@ -1023,18 +1180,28 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case PM_RIGHT_B: case PM_RIGHT_Z: case PM_RIGHT_Z | PM_RIGHT_B: - if (strlen(val) < fwidth) { - t = (char *)ncalloc(fwidth + 1); - memset(t, (v->pm->flags & PM_RIGHT_B) ? ' ' : '0', fwidth); - if ((t0 = strlen(val)) > fwidth) - t0 = fwidth; - strcpy(t + (fwidth - t0), val); - val = t; - } else { - t = (char *)ncalloc(fwidth + 1); - t[fwidth] = '\0'; - strncpy(t, val + strlen(val) - fwidth, fwidth); - val = t; + { + int zero = 1; + + if (strlen(val) < fwidth) { + if (v->pm->flags & PM_RIGHT_Z) { + for (t = val; iblank(*t); t++); + if (!*t || !idigit(*t)) + zero = 0; + } + t = (char *) hcalloc(fwidth + 1); + memset(t, (((v->pm->flags & PM_RIGHT_B) || !zero) ? + ' ' : '0'), fwidth); + if ((t0 = strlen(val)) > fwidth) + t0 = fwidth; + strcpy(t + (fwidth - t0), val); + val = t; + } else { + t = (char *) hcalloc(fwidth + 1); + t[fwidth] = '\0'; + strncpy(t, val + strlen(val) - fwidth, fwidth); + val = t; + } } break; } @@ -1043,13 +1210,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case PM_LOWER: t = val; - for (; *t; t++) - *t = tulower(*t); + for (; (c = *t); t++) + *t = tulower(c); break; case PM_UPPER: t = val; - for (; *t; t++) - *t = tuupper(*t); + for (; (c = *t); t++) + *t = tuupper(c); break; } } @@ -1062,12 +1229,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (nojoin) isarr = -1; if (qt && !getlen && isarr > 0) { - val = sepjoin(aval, sep); + val = sepjoin(aval, sep, 1); isarr = 0; } } idend = s; + if (inbrace) + while (INULL(*s)) + s++; if ((colf = *s == ':')) s++; @@ -1077,12 +1247,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) fstr = s; if (inbrace) { int bct; - for (bct = 1;; fstr++) { - if (!*fstr) - break; - else if (*fstr == Inbrace) + for (bct = 1; (c = *fstr); fstr++) { + if (c == Inbrace) bct++; - else if (*fstr == Outbrace && !--bct) + else if (c == Outbrace && !--bct) break; } @@ -1091,36 +1259,83 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) zerr("closing brace expected", NULL, 0); return NULL; } - if (*fstr) + if (c) *fstr++ = '\0'; } /* Check for ${..?..} or ${..=..} or one of those. * * Only works if the name is in braces. */ - if (inbrace && (*s == '-' || - *s == '+' || - *s == ':' || - *s == '=' || *s == Equals || - *s == '%' || - *s == '#' || *s == Pound || - *s == '?' || *s == Quest)) { + if (inbrace && ((c = *s) == '-' || + c == '+' || + c == ':' || + c == '=' || c == Equals || + c == '%' || + c == '#' || c == Pound || + c == '?' || c == Quest || + c == '/')) { if (!flnum) flnum++; - if (*s == '%') - flags |= 1; + if (c == '%') + flags |= SUB_END; /* Check for ${..%%..} or ${..##..} */ - if ((*s == '%' || *s == '#' || *s == Pound) && *s == s[1]) { + if ((c == '%' || c == '#' || c == Pound) && c == s[1]) { s++; - doub = 1; + /* we have %%, not %, or ##, not # */ + flags |= SUB_LONG; } s++; + if (s[-1] == '/') { + char *ptr; + /* + * previous flags are irrelevant, except for (S) which + * indicates shortest substring; else look for longest. + */ + flags = (flags & SUB_SUBSTR) ? 0 : SUB_LONG; + if ((c = *s) == '/') { + /* doubled, so replace all occurrences */ + flags |= SUB_GLOBAL; + s++; + } + /* Check for anchored substitution */ + if (c == '%') { + /* anchor at tail */ + flags |= SUB_END; + s++; + } else if (c == '#' || c == Pound) { + /* anchor at head: this is the `normal' case in getmatch */ + s++; + } else + flags |= SUB_SUBSTR; + /* + * Find the / marking the end of the search pattern. + * If there isn't one, we're just going to delete that, + * i.e. replace it with an empty string. + * + * This allows quotation of the slash with '\\/'. Why + * two? Well, for a non-quoted string we can check for + * Bnull+/, which is what you get from `\/', but inside + * double quotes the Bnull isn't there, so it's not + * consistent. + */ + for (ptr = s; (c = *ptr) && c != '/'; ptr++) + if (c == '\\' && ptr[1] == '/') + chuck(ptr); + replstr = (*ptr && ptr[1]) ? ptr+1 : ""; + *ptr = '\0'; + } - flags |= (doub << 1) | (substr << 2) | (colf << 8); - if (!(flags & 0xf8)) - flags |= 16; + if (colf) + flags |= SUB_ALL; + /* + * With no special flags, i.e. just a # or % or whatever, + * the matched portion is removed and we keep the rest. + * We also want the rest when we're doing a substitution. + */ + if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN))) + flags |= SUB_REST; if (colf && !vunset) vunset = (isarr) ? !*aval : !*val || (*val == Nularg && !val[1]); @@ -1138,7 +1353,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case '-': if (vunset) { val = dupstring(s); - multsub(&val, &aval, &isarr, NULL); + multsub(&val, NULL, &isarr, NULL); copied = 1; } break; @@ -1164,7 +1379,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (arrasg) { char *arr[2], **t, **a, **p; if (spsep || spbreak) { - aval = sepsplit(val, spsep, 0); + aval = sepsplit(val, spsep, 0, 1); isarr = 2; sep = spsep = NULL; spbreak = 0; @@ -1176,10 +1391,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) else t = aval; } else if (!isarr) { - arr[0] = val; - arr[1] = NULL; + if (!*val && arrasg > 1) { + arr[0] = NULL; + l = 0; + } else { + arr[0] = val; + arr[1] = NULL; + l = 1; + } t = aval = arr; - l = 1; } else l = arrlen(aval), t = aval; p = a = zalloc(sizeof(char *) * (l + 1)); @@ -1188,7 +1408,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) *p++ = ztrdup(*t++); } *p++ = NULL; - setaparam(idbeg, a); + if (arrasg > 1) { + Param pm = sethparam(idbeg, a); + if (pm) + aval = paramvalarr(pm->gets.hfn(pm), hkeys|hvals); + } else + setaparam(idbeg, a); } else { untokenize(val); setsparam(idbeg, ztrdup(val)); @@ -1214,27 +1439,47 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) case '%': case '#': case Pound: - if (qt) - if (parse_subst_string(s)) { + case '/': + if (qt) { + int one = noerrs, oef = errflag, haserr; + + if (!quoteerr) + noerrs = 1; + haserr = parse_subst_string(s); + noerrs = one; + if (!quoteerr) { + errflag = oef; + if (haserr) + tokenize(s); + } else if (haserr || errflag) { zerr("parse error in ${...%c...} substitution", NULL, s[-1]); return NULL; } - singsub(&s); + } + { + char t = s[-1]; + + singsub(&s); + if (t == '/' && (flags & SUB_SUBSTR)) { + if ((c = *s) == '#' || c == '%') { + flags &= ~SUB_SUBSTR; + if (c == '%') + flags |= SUB_END; + s++; + } else if (c == '\\') { + s++; + } + } + } if (!vunset && isarr) { - char **ap = aval; - char **pp = aval = (char **)ncalloc(sizeof(char *) * (arrlen(aval) + 1)); - - while ((*pp = *ap++)) { - if (getmatch(pp, s, flags, flnum)) - pp++; - } + getmatcharr(&aval, s, flags, flnum, replstr); copied = 1; } else { if (vunset) val = dupstring(""); - getmatch(&val, s, flags, flnum); + getmatch(&val, s, flags, flnum, replstr); copied = 1; } break; @@ -1259,7 +1504,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) else { char *ss; char **ap = aval; - char **pp = aval = (char **)ncalloc(sizeof(char *) * (arrlen(aval) + 1)); + char **pp = aval = (char **) hcalloc(sizeof(char *) * + (arrlen(aval) + 1)); while ((*pp = *ap++)) { ss = s; @@ -1272,6 +1518,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } s = ss; } + copied = 1; if (inbrace && *s) { if (*s == ':' && !imeta(s[1])) zerr("unrecognized modifier `%c'", NULL, s[1]); @@ -1316,10 +1563,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = dupstring(buf); isarr = 0; } + mult_isarr = isarr; if (isarr > 0 && !plan9 && (!aval || !aval[0])) { val = dupstring(""); isarr = 0; } else if (isarr && aval && aval[0] && !aval[1]) { + /* treat a one-element array as a scalar for purposes of * + * concatenation with surrounding text (some${param}thing) * + * and rc_expand_param handling. Note: mult_isarr (above) * + * propagates the true array type from nested expansions. */ val = aval[0]; isarr = 0; } @@ -1327,9 +1579,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) * It means that we must join arrays and should not split words. */ if (ssub || spbreak || spsep || sep) { if (isarr) - val = sepjoin(aval, sep), isarr = 0; + val = sepjoin(aval, sep, 1), isarr = 0; if (!ssub && (spbreak || spsep)) { - aval = sepsplit(val, spsep, 0); + aval = sepsplit(val, spsep, 0, 1); if (!aval || !aval[0]) val = dupstring(""); else if (!aval[1]) @@ -1337,6 +1589,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) else isarr = 2; } + mult_isarr = isarr; } if (casmod) { if (isarr) { @@ -1367,6 +1620,134 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) makecapitals(&val); } } + if (presc) { + int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG]; + int opp = opts[PROMPTPERCENT], len; + + if (presc < 2) { + opts[PROMPTPERCENT] = 1; + opts[PROMPTSUBST] = opts[PROMPTBANG] = 0; + } + if (isarr) { + char **ap; + + if (!copied) + aval = arrdup(aval), copied = 1; + ap = aval; + for (; *ap; ap++) { + unmetafy(*ap, &len); + untokenize(*ap); + *ap = unmetafy(promptexpand(metafy(*ap, len, META_NOALLOC), + 0, NULL, NULL), &len); + } + } else { + if (!copied) + val = dupstring(val), copied = 1; + unmetafy(val, &len); + untokenize(val); + val = unmetafy(promptexpand(metafy(val, len, META_NOALLOC), + 0, NULL, NULL), &len); + } + opts[PROMPTSUBST] = ops; + opts[PROMPTBANG] = opb; + opts[PROMPTPERCENT] = opp; + } + if (quotemod) { + if (--quotetype > 3) + quotetype = 3; + if (isarr) { + char **ap; + + if (!copied) + aval = arrdup(aval), copied = 1; + ap = aval; + + if (quotemod > 0) { + if (quotetype) { + int sl; + char *tmp; + + for (; *ap; ap++) { + int pre = quotetype != 3 ? 1 : 2; + tmp = bslashquote(*ap, NULL, quotetype); + sl = strlen(tmp); + *ap = (char *) zhalloc(pre + sl + 2); + strcpy((*ap) + pre, tmp); + ap[0][pre - 1] = ap[0][pre + sl] = (quotetype != 2 ? '\'' : '"'); + ap[0][pre + sl + 1] = '\0'; + if (quotetype == 3) + ap[0][0] = '$'; + } + } else + for (; *ap; ap++) + *ap = bslashquote(*ap, NULL, 0); + } else { + int one = noerrs, oef = errflag, haserr = 0; + + if (!quoteerr) + noerrs = 1; + for (; *ap; ap++) { + haserr |= parse_subst_string(*ap); + remnulargs(*ap); + untokenize(*ap); + } + noerrs = one; + if (!quoteerr) + errflag = oef; + else if (haserr || errflag) { + zerr("parse error in parameter value", NULL, 0); + return NULL; + } + } + } else { + if (!copied) + val = dupstring(val), copied = 1; + if (quotemod > 0) { + if (quotetype) { + int pre = quotetype != 3 ? 1 : 2; + int sl; + char *tmp; + tmp = bslashquote(val, NULL, quotetype); + sl = strlen(tmp); + val = (char *) zhalloc(pre + sl + 2); + strcpy(val + pre, tmp); + val[pre - 1] = val[pre + sl] = (quotetype != 2 ? '\'' : '"'); + val[pre + sl + 1] = '\0'; + if (quotetype == 3) + val[0] = '$'; + } else + val = bslashquote(val, NULL, 0); + } else { + int one = noerrs, oef = errflag, haserr; + + if (!quoteerr) + noerrs = 1; + haserr = parse_subst_string(val); + noerrs = one; + if (!quoteerr) + errflag = oef; + else if (haserr || errflag) { + zerr("parse error in parameter value", NULL, 0); + return NULL; + } + remnulargs(val); + untokenize(val); + } + } + } + if (visiblemod) { + if (isarr) { + char **ap; + if (!copied) + aval = arrdup(aval), copied = 1; + for (ap = aval; *ap; ap++) + *ap = nicedupstring(*ap); + } else { + if (!copied) + val = dupstring(val), copied = 1; + val = nicedupstring(val); + } + } if (isarr) { char *x; char *y; @@ -1378,7 +1759,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (aptr > (char *) getdata(n) && aptr[-1] == Dnull && *fstr == Dnull) *--aptr = '\0', fstr++; - y = (char *)ncalloc((aptr - ostr) + strlen(fstr) + 1); + y = (char *) hcalloc((aptr - ostr) + strlen(fstr) + 1); strcpy(y, ostr); *str = y + (aptr - ostr); strcpy(*str, fstr); @@ -1398,26 +1779,27 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) qsort(aval, i, sizeof(char *), sortfn[sortit-1]); } if (plan9) { - LinkList tl = newlinklist(); LinkNode tn; + local_list1(tl); *--fstr = Marker; - addlinknode(tl, fstr); - if (!eval && !stringsubst(tl, firstnode(tl), ssub)) + init_list1(tl, fstr); + if (!eval && !stringsubst(&tl, firstnode(&tl), ssub)) return NULL; *str = aptr; - tn = firstnode(tl); + tn = firstnode(&tl); while ((x = *aval++)) { if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && parsestr(x)) + if (eval && subst_parse_str(&x, (qt && !nojoin))) return NULL; xlen = strlen(x); - for (tn = firstnode(tl); + for (tn = firstnode(&tl); tn && *(y = (char *) getdata(tn)) == Marker; incnode(tn)) { - strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst); + strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst, + copied); if (qt && !*y && isarr != 2) y = dupstring(nulstring); if (plan9) @@ -1446,10 +1828,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && parsestr(x)) + if (eval && subst_parse_str(&x, (qt && !nojoin))) return NULL; xlen = strlen(x); - strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst); + strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied); if (qt && !*y && isarr != 2) y = dupstring(nulstring); setdata(n, (void *) y); @@ -1461,7 +1843,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && parsestr(x)) + if (eval && subst_parse_str(&x, (qt && !nojoin))) return NULL; if (qt && !*x && isarr != 2) y = dupstring(nulstring); @@ -1477,10 +1859,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && parsestr(x)) + if (eval && subst_parse_str(&x, (qt && !nojoin))) return NULL; xlen = strlen(x); - *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst); + *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied); if (qt && !*y && isarr != 2) y = dupstring(nulstring); insertlinknode(l, n, (void *) y), incnode(n); @@ -1496,11 +1878,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, premul, postmul); - if (eval && parsestr(x)) + if (eval && subst_parse_str(&x, (qt && !nojoin))) return NULL; xlen = strlen(x); - *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst); - if (qt && !*y && isarr != 2) + *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied); + if (qt && !*y) y = dupstring(nulstring); setdata(n, (void *) y); } @@ -1522,14 +1904,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) static char * arithsubst(char *a, char **bptr, char *rest) { - char *s = *bptr, *t, buf[DIGBUFSIZE]; - char *b = buf; - long v; + char *s = *bptr, *t; + char buf[DIGBUFSIZE], *b = buf; + mnumber v; singsub(&a); v = matheval(a); - sprintf(buf, "%ld", v); - t = *bptr = (char *)ncalloc(strlen(*bptr) + strlen(buf) + strlen(rest) + 1); + if (v.type & MN_FLOAT) + b = convfloat(v.u.d, 0, 0, NULL); + else + convbase(buf, v.u.l, 0); + t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) + + strlen(rest) + 1); t--; while ((*++t = *s++)); t--; @@ -1567,6 +1953,8 @@ modify(char **str, char **ptr) case 't': case 'l': case 'u': + case 'q': + case 'Q': c = **ptr; break; @@ -1587,7 +1975,7 @@ modify(char **str, char **ptr) if (*ptr1) { zsfree(hsubl); hsubl = ztrdup(ptr1); - } + } if (!hsubl) { zerr("no previous substitution", NULL, 0); return; @@ -1685,11 +2073,26 @@ modify(char **str, char **ptr) if (hsubl && hsubr) subst(©, hsubl, hsubr, gbal); break; + case 'q': + copy = bslashquote(copy, NULL, 0); + break; + case 'Q': + { + int one = noerrs, oef = errflag; + + noerrs = 1; + parse_subst_string(copy); + noerrs = one; + errflag = oef; + remnulargs(copy); + untokenize(copy); + } + break; } tc = *tt; *tt = '\0'; nl = al + strlen(t) + strlen(copy); - ptr1 = tmp = (char *)halloc(nl + 1); + ptr1 = tmp = (char *)zhalloc(nl + 1); if (all) for (ptr2 = all; *ptr2;) *ptr1++ = *ptr2++; @@ -1736,6 +2139,21 @@ modify(char **str, char **ptr) } } break; + case 'q': + *str = bslashquote(*str, NULL, 0); + break; + case 'Q': + { + int one = noerrs, oef = errflag; + + noerrs = 1; + parse_subst_string(*str); + noerrs = one; + errflag = oef; + remnulargs(*str); + untokenize(*str); + } + break; } } if (rec < 0) { @@ -1765,6 +2183,8 @@ dstackent(char ch, int val) else for (end=NULL, n=firstnode(dirstack); n && val; val--, n=nextnode(n)); if (n == end) { + if (backwards && !val) + return pwd; if (isset(NOMATCH)) zerr("not enough directory stack entries.", NULL, 0); return NULL; diff --git a/Test/11glob.ztst b/Test/11glob.ztst index f5819595b..3a64b8c22 100644 --- a/Test/11glob.ztst +++ b/Test/11glob.ztst @@ -162,6 +162,18 @@ >1: [[ read.me = (#ia1)README~READ.ME ]] >0: [[ read.me = (#ia1)README~READ_ME ]] >1: [[ read.me = (#ia1)README~(#a1)READ_ME ]] +>0: [[ test = *((#s)|/)test((#e)|/)* ]] +>0: [[ test/path = *((#s)|/)test((#e)|/)* ]] +>0: [[ path/test = *((#s)|/)test((#e)|/)* ]] +>0: [[ path/test/ohyes = *((#s)|/)test((#e)|/)* ]] +>1: [[ atest = *((#s)|/)test((#e)|/)* ]] +>1: [[ testy = *((#s)|/)test((#e)|/)* ]] +>1: [[ testy/path = *((#s)|/)test((#e)|/)* ]] +>1: [[ path/atest = *((#s)|/)test((#e)|/)* ]] +>1: [[ atest/path = *((#s)|/)test((#e)|/)* ]] +>1: [[ path/testy = *((#s)|/)test((#e)|/)* ]] +>1: [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]] +>1: [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]] >0 tests failed. globtest globtests.ksh diff --git a/Test/ztst.zsh b/Test/ztst.zsh index d3d03e883..c9e5a0294 100755 --- a/Test/ztst.zsh +++ b/Test/ztst.zsh @@ -14,16 +14,21 @@ # Produce verbose messages if non-zero. # If 1, produce reports of tests executed; if 2, also report on progress. -ZTST_verbose=0 +# Defined in such a way that any value from the environment is used. +: ${ZTST_verbose:=0} # We require all options to be reset, not just emulation options. # Unfortunately, due to the crud which may be in /etc/zshenv this might # still not be good enough. Maybe we should trick it somehow. emulate -R zsh +# Set the module load path to correspond to this build of zsh. +# This Modules directory should have been created by "make check". +[[ -d Modules/zsh ]] && module_path=( $PWD/Modules ) + # We need to be able to save and restore the options used in the test. # We use the $options variable of the parameter module for this. -zmodload -i parameter +zmodload -i zsh/parameter # Note that both the following are regular arrays, since we only use them # in whole array assignments to/from $options. @@ -42,18 +47,31 @@ ZTST_mainopts=(${(kv)options}) ZTST_testdir=$PWD ZTST_testname=$1 +# The source directory is not necessarily the current directory, +# but if $0 doesn't contain a `/' assume it is. +if [[ $0 = */* ]]; then + ZTST_srcdir=${0%/*} +else + ZTST_srcdir=$PWD +fi +[[ $ZTST_srcdir = /* ]] || ZTST_srcdir="$ZTST_testdir/$ZTST_srcdir" + +# Set the function autoload paths to correspond to this build of zsh. +fpath=( $ZTST_srcdir/../(Completion|Functions)/*~*/CVS(/) ) + +: ${TMPPREFIX:=/tmp/zsh} # Temporary files for redirection inside tests. -ZTST_in=${TMPPREFIX-:/tmp/zsh}.ztst.in.$$ +ZTST_in=${TMPPREFIX}.ztst.in.$$ # hold the expected output -ZTST_out=${TMPPREFIX-:/tmp/zsh}.ztst.out.$$ -ZTST_err=${TMPPREFIX-:/tmp/zsh}.ztst.err.$$ +ZTST_out=${TMPPREFIX}.ztst.out.$$ +ZTST_err=${TMPPREFIX}.ztst.err.$$ # hold the actual output from the test -ZTST_tout=${TMPPREFIX-:/tmp/zsh}.ztst.tout.$$ -ZTST_terr=${TMPPREFIX-:/tmp/zsh}.ztst.terr.$$ +ZTST_tout=${TMPPREFIX}.ztst.tout.$$ +ZTST_terr=${TMPPREFIX}.ztst.terr.$$ ZTST_cleanup() { - rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp \ - $ZTST_in $ZTST_out $ZTST_err $ZTST_tout $ZTST_terr + cd $ZTST_testdir + rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp ${TMPPREFIX}.ztst*$$ } # This cleanup always gets performed, even if we abort. Later, @@ -67,10 +85,11 @@ rm -rf dummy.tmp *.tmp # Report failure. Note that all output regarding the tests goes to stdout. # That saves an unpleasant mixture of stdout and stderr to sort out. ZTST_testfailed() { - print "Test $ZTST_testname failed: $1" + print -r "Test $ZTST_testname failed: $1" if [[ -n $ZTST_message ]]; then - print "Was testing: $ZTST_message" + print -r "Was testing: $ZTST_message" fi + print -r "$ZTST_testname: test failed." ZTST_cleanup exit 1 } @@ -79,7 +98,7 @@ ZTST_testfailed() { ZTST_verbose() { local lev=$1 shift - [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print $* >&8 + [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -- $* >&8 } [[ ! -r $ZTST_testname ]] && ZTST_testfailed "can't read test file." @@ -97,7 +116,7 @@ ZTST_cursect='' ZTST_getline() { local IFS= while true; do - read ZTST_curline <&9 || return 1 + read -r ZTST_curline <&9 || return 1 [[ $ZTST_curline == \#* ]] || return 0 done } @@ -144,7 +163,7 @@ $ZTST_code" # Read in a piece for redirection. ZTST_getredir() { - local char=${ZTST_curline[1]} + local char=${ZTST_curline[1]} fn ZTST_redir=${ZTST_curline[2,-1]} while ZTST_getline; do [[ $ZTST_curline[1] = $char ]] || break @@ -153,6 +172,22 @@ ${ZTST_curline[2,-1]}" done ZTST_verbose 2 "ZTST_getredir: read redir for '$char': $ZTST_redir" + +case $char in + '<') fn=$ZTST_in + ;; + '>') fn=$ZTST_out + ;; + '?') fn=$ZTST_err + ;; + *) ZTST_testfailed "bad redir operator: $char" + ;; +esac +if [[ $ZTST_flags = *q* ]]; then + print -r -- "${(e)ZTST_redir}" >>$fn +else + print -r -- "$ZTST_redir" >>$fn +fi } # Execute an indented chunk. Redirections will already have @@ -209,27 +244,24 @@ $ZTST_curline" fi ;; [[:space:]]##[^[:space:]]*) ZTST_getchunk - [[ $ZTST_curline != [-0-9]* ]] && - ZTST_testfailed "expecting test status at: -$ZTST_curline" - ZTST_xstatus=$ZTST_curline - if [[ $ZTST_curline == (#b)([^:]##):(*) ]]; then + if [[ $ZTST_curline == (#b)([-0-9]##)([[:alpha:]]#)(:*)# ]]; then ZTST_xstatus=$match[1] - ZTST_message=$match[2] + ZTST_flags=$match[2] + ZTST_message=${match[3]:+${match[3][2,-1]}} + else + ZTST_testfailed "expecting test status at: +$ZTST_curline" fi ZTST_getline found=1 ;; '<'*) ZTST_getredir - print -r "${(e)ZTST_redir}" >>$ZTST_in found=1 ;; '>'*) ZTST_getredir - print -r "${(e)ZTST_redir}" >>$ZTST_out found=1 ;; '?'*) ZTST_getredir - print -r "${(e)ZTST_redir}" >>$ZTST_err found=1 ;; *) ZTST_testfailed "bad line in test block: @@ -240,8 +272,7 @@ $ZTST_curline" # If we found some code to execute... if [[ -n $ZTST_code ]]; then - ZTST_verbose 1 "Running test: -$ZTST_message" + ZTST_verbose 1 "Running test: $ZTST_message" ZTST_verbose 2 "ZTST_test: expecting status: $ZTST_xstatus" ZTST_execchunk <$ZTST_in >$ZTST_tout 2>$ZTST_terr @@ -249,7 +280,9 @@ $ZTST_message" # First check we got the right status, if specified. if [[ $ZTST_xstatus != - && $ZTST_xstatus != $ZTST_status ]]; then ZTST_testfailed "bad status $ZTST_status, expected $ZTST_xstatus from: -$ZTST_code" +$ZTST_code${$(<$ZTST_terr):+ +Error output: +$(<$ZTST_terr)}" fi ZTST_verbose 2 "ZTST_test: test produced standard output: @@ -258,11 +291,13 @@ ZTST_test: and standard error: $(<$ZTST_terr)" # Now check output and error. - if ! diff -c $ZTST_out $ZTST_tout; then + if [[ $ZTST_flags != *d* ]] && ! diff -c $ZTST_out $ZTST_tout; then ZTST_testfailed "output differs from expected as shown above for: -$ZTST_code" +$ZTST_code${$(<$ZTST_terr):+ +Error output: +$(<$ZTST_terr)}" fi - if ! diff -c $ZTST_err $ZTST_terr; then + if [[ $ZTST_flags != *D* ]] && ! diff -c $ZTST_err $ZTST_terr; then ZTST_testfailed "error output differs from expected as shown above for: $ZTST_code" fi -- cgit 1.4.1