diff options
Diffstat (limited to 'Src/subst.c')
-rw-r--r-- | Src/subst.c | 358 |
1 files changed, 316 insertions, 42 deletions
diff --git a/Src/subst.c b/Src/subst.c index 0f98e6ea3..a079672df 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -556,7 +556,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, for ( ; *x; x += l) { char c = (l = *x == Meta) ? x[1] ^ 32 : *x; l++; - if (!iwsep(STOUC(c))) + if (!iwsep((unsigned char) c)) break; *ms_flags |= MULTSUB_WS_AT_START; } @@ -573,7 +573,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, convchar_t c; if (*x == Dash) *x = '-'; - if (itok(STOUC(*x))) { + if (itok((unsigned char) *x)) { /* token, can't be separator, must be single byte */ rawc = *x; l = 1; @@ -582,7 +582,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, if (!inq && !inp && WC_ZISTYPE(c, ISEP)) { *x = '\0'; for (x += l; *x; x += l) { - if (itok(STOUC(*x))) { + if (itok((unsigned char) *x)) { /* as above */ rawc = *x; l = 1; @@ -1489,21 +1489,27 @@ subst_parse_str(char **sp, int single, int err) static char * substevalchar(char *ptr) { - zlong ires = mathevali(ptr); + zlong ires; int len = 0; + int saved_errflag = errflag; - if (errflag) - return NULL; -#ifdef MULTIBYTE_SUPPORT - if (isset(MULTIBYTE) && ires > 127) { - /* '\\' + 'U' + 8 bytes of character + '\0' */ - char buf[11]; + errflag = 0; + ires = mathevali(ptr); - /* inefficient: should separate out \U handling from getkeystring */ - sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu); - ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL); + if (errflag) { /* not a valid numerical expression */ + errflag |= saved_errflag; + return noerrs ? dupstring(""): NULL; + } + errflag |= saved_errflag; + if (ires < 0) { + zerr("character not in range"); + } +#ifdef MULTIBYTE_SUPPORT + else if (isset(MULTIBYTE) && ires > 127) { + ptr = zhalloc(MB_CUR_MAX+1); + len = ucs4tomb((unsigned int)ires & 0xffffffff, ptr); } - if (len == 0) + if (len <= 0) #endif { ptr = zhalloc(2); @@ -1818,14 +1824,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * Use for the (k) flag. Goes down into the parameter code, * sometimes. */ - char hkeys = 0; + int hkeys = 0; /* * Used for the (v) flag, ditto. Not quite sure why they're * separate, but the tradition seems to be that things only * get combined when that makes the result more obscure rather * than less. */ - char hvals = 0; + int hvals = 0; /* * Whether we had to evaluate a subexpression, i.e. an * internal ${...} or $(...) or plain $pm. We almost don't @@ -1860,6 +1866,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * joining the array into a string (for compatibility with ksh/bash). */ int quoted_array_with_offset = 0; + /* + * Nofork substitution controls + */ + char *rplyvar = NULL; /* Indicates ${|...;} or ${{var} ...;} */ + char *rplytmp = NULL; /* Indicates ${ ... ;} */ *s++ = '\0'; /* @@ -1870,8 +1881,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * these later on, too. */ c = *s; - if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound && - !IS_DASH(c) && + if (itype_end(s, (c == Inbrace ? INAMESPC : IIDENT), 1) == s && + *s != '#' && c != Pound && !IS_DASH(c) && c != '!' && c != '$' && c != String && c != Qstring && c != '?' && c != Quest && c != '*' && c != Star && c != '@' && c != '{' && @@ -1887,19 +1898,235 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * flags in parentheses, but also one ksh hack. */ if (c == Inbrace) { - inbrace = 1; - s++; + /* For processing nofork command substitution string */ + char *cmdarg = NULL, *endvar = NULL, inchar = *++s; + char *outbracep = s, sav = *s; + Param rplypm = NULL; + size_t slen = 0; + int trim = (!EMULATION(EMULATE_ZSH)) ? 2 : !qt; + + inbrace = 1; /* Outer scope boolean, see above */ + + /* Handling for nofork command substitution e.g. ${|cmd;} + * See other comments about kludges for why this is here. + * + * The command string is extracted and executed, and the + * substitution assigned. There's no (...)-flags processing, + * i.e. no ${|(U)cmd;}, because it looks quite awful and + * should not be part of command substitution in any case. + * Use ${(U)${|cmd;}} as you would for ${(U)$(cmd;)}. + */ + if (inchar == '|' || inchar == Bar || inblank(inchar)) { + *s = Inbrace; + if (skipparens(Inbrace, Outbrace, &outbracep) == 0) + slen = outbracep - s - 1; + *s = sav; + if (inchar == '|') + inchar = Bar; /* Simplify later compares */ + } else if (inchar == '{' || inchar == Inbrace) { + *s = Inbrace; + if ((outbracep = itype_end(s+1, INAMESPC, 0))) { + if (*outbracep == Inbrack && + (outbracep = parse_subscript(++outbracep, 1, ']'))) + ++outbracep; + } + + /* If we reached the first close brace, find the last */ + if (outbracep && *outbracep == Outbrace) { + char outchar = inchar == Inbrace ? Outbrace : '}'; + endvar = outbracep++; + + /* Require space to avoid ${{var}} typo for ${${var}} */ + if (!inblank(*outbracep)) { + zerr("bad substitution"); + return NULL; + } + + *endvar = '|'; /* Almost anything but braces/brackets */ + outbracep = s; + if (skipparens(Inbrace, outchar, &outbracep) == 0) + *endvar = Outbrace; + else { /* Never happens? */ + *endvar = outchar; + outbracep = endvar + 1; + } + slen = outbracep - s - 1; + if (inchar != Inbrace) + outbracep[-1] = Outbrace; + *s = sav; + inchar = Inbrace; /* Simplify later compares */ + } else { + zerr("bad substitution"); + return NULL; + } + } + if (slen > 1) { + char *outbracep = s + slen; + if (!itok(*s) || inblank(inchar)) { + /* This tokenize() is important */ + char sav = *outbracep; + *outbracep = '\0'; + tokenize(s); + *outbracep = sav; + } + if (*outbracep == Outbrace) { + if (endvar == s+1) { + /* For consistency with ${} we allow ${{}...} */ + rplyvar = NULL; + } + if (endvar && *endvar == Outbrace) { + cmdarg = dupstrpfx(endvar+1, outbracep-endvar-1); + rplyvar = dupstrpfx(s+1,endvar-s-1); + } else { + cmdarg = dupstrpfx(s+1, outbracep-s-1); + rplyvar = "REPLY"; + } + if (inblank(inchar)) { + /* + * Admittedly a hack. Take advantage of the added + * parameter scope and the semantics of $(<file) to + * construct a command to write/read a temporary file. + * Then fall through to the regular parameter handling + * to manage word splitting, expansion flags, etc. + */ + char *outfmt = ">| %s {\n%s\n;}"; /* 13 */ + if ((rplytmp = gettempname(NULL, 1))) { + /* Prevent shenanigans with $TMPPREFIX */ + char *tmpfile = quotestring(rplytmp, QT_BACKSLASH); + char *dummy = zhalloc(strlen(cmdarg) + + strlen(tmpfile) + + 13); + sprintf(dummy, outfmt, tmpfile, cmdarg); + cmdarg = dummy; + } else { + /* TMPPREFIX not writable? */ + cmdoutval = lastval; + cmdarg = NULL; + } + } + s = outbracep; + } + } + + if (rplyvar) { + /* char *rplyval = getsparam("REPLY"); cf. Future? below */ + startparamscope(); /* "local" behaves as if in a function */ + if (inchar == Bar) { + /* rplyvar should be REPLY at this point, but create + * hardwired name anyway to expose any bugs elsewhere + */ + rplypm = createparam("REPLY", PM_LOCAL|PM_UNSET|PM_HIDE); + if (rplypm) /* Shouldn't createparam() do this? */ + rplypm->level = locallevel; + /* Future? Expose global value of $REPLY if any? */ + /* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */ + } else if (inblank(inchar)) { + rplypm = createparam(".zsh.cmdsubst", + PM_LOCAL|PM_UNSET|PM_HIDE| + PM_READONLY_SPECIAL); + if (rplypm) + rplypm->level = locallevel; + } + if (inchar != Inbrace && !rplypm) { + zerr("failed to create scope for command substitution"); + return NULL; + } + } + + if (rplyvar && cmdarg && *cmdarg) { + int obreaks = breaks; + Eprog cmdprog; + /* Execute the shell command */ + queue_signals(); + untokenize(cmdarg); + cmdprog = parse_string(cmdarg, 0); + if (cmdprog) { + /* exec.c handles dont_queue_signals() */ + execode(cmdprog, 1, 0, "cmdsubst"); + cmdoutval = lastval; + /* "return" behaves as if in a function */ + if (retflag) { + retflag = 0; + breaks = obreaks; /* Is this ever not zero? */ + } + } else /* parse error */ + errflag |= ERRFLAG_ERROR; + if (rplypm) + rplypm->node.flags &= ~PM_READONLY_SPECIAL; + if (rplytmp && !errflag) { + int onoerrs = noerrs, rplylen; + noerrs = 2; + rplylen = zstuff(&cmdarg, rplytmp); + if (trim) { + /* bash and ksh strip trailing newlines here */ + while (rplylen > 0 && cmdarg[rplylen-1] == '\n') { + rplylen--; + if (trim == 1) + break; + } + cmdarg[rplylen] = 0; + } + noerrs = onoerrs; + if (rplylen >= 0) + setsparam(rplyvar, metafy(cmdarg, rplylen, META_REALLOC)); + } + unqueue_signals(); + } + + if (rplytmp) + unlink(rplytmp); + if (rplyvar) { + if (inchar != Inbrace) { + if ((val = dupstring(getsparam(rplyvar)))) + vunset = 0; + else { + vunset = 1; + val = dupstring(""); + } + } else { + s = dyncat(rplyvar, s); + rplyvar = NULL; + } + endparamscope(); + if (exit_pending) { + if (mypid == getpid()) { + /* + * paranoia: don't check for jobs, but there + * shouldn't be any if not interactive. + */ + stopmsg = 1; + zexit(exit_val, ZEXIT_NORMAL); + } else + _exit(exit_val); + } + } + /* * In ksh emulation a leading `!' is a special flag working - * sort of like our (k). + * sort of like our (k). This is true only for arrays or + * associative arrays and only with subscripts [*] or [@], + * so zsh's implementation is approximate. For namerefs + * in ksh, ${!ref} substitues the parameter name at the + * end of any chain of references, rather than the value. + * * TODO: this is one of very few cases tied directly to * the emulation mode rather than an option. Since ksh * doesn't have parameter flags it might be neater to * handle this with the ^, =, ~ stuff, below. */ if ((c = *s) == '!' && s[1] != Outbrace && EMULATION(EMULATE_KSH)) { - hkeys = SCANPM_WANTKEYS; + hkeys = SCANPM_WANTKEYS|SCANPM_NONAMEREF; s++; + /* There's a slew of other special bash meanings of parameter + * references that start with "!": + * ${!name} == ${(P)name} (when name is not a nameref) + * ${!name*} == ${(k)parameters[(I)name*]} + * ${!name@} == ${(@k)parameters[(I)name*]} + * ${!name[*]} == ${(k)name} (but indexes of ordinary arrays, too) + * ${!name[@]} == ${(@k)name} (ditto, as noted above for ksh) + * + * See also workers/34390, workers/34397, workers/34408. + */ } else if (c == '(' || c == Inpar) { char *t, sav; int tt = 0; @@ -2154,10 +2381,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, escapes = 1; break; + case '!': + if ((hkeys|hvals) & ~SCANPM_NONAMEREF) + goto flagerr; + hkeys = SCANPM_NONAMEREF; + break; case 'k': + if (hkeys & ~SCANPM_WANTKEYS) + goto flagerr; hkeys = SCANPM_WANTKEYS; break; case 'v': + if (hvals & ~SCANPM_WANTVALS) + goto flagerr; hvals = SCANPM_WANTVALS; break; @@ -2308,7 +2544,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, /* * Look for special unparenthesised flags. * TODO: could make these able to appear inside parentheses, too, - * i.e. ${(^)...} etc. + * i.e. ${(^)...} etc., but ${(~)...} already has another meaning. */ for (;;) { if ((c = *s) == '^' || c == Hat) { @@ -2332,7 +2568,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, } } else if ((c == '#' || c == Pound) && (inbrace || !isset(POSIXIDENTIFIERS)) && - (itype_end(s+1, IIDENT, 0) != s + 1 + (itype_end(s+1, INAMESPC, 0) != s + 1 || (cc = s[1]) == '*' || cc == Star || cc == '@' || cc == '?' || cc == Quest || cc == '$' || cc == String || cc == Qstring @@ -2369,8 +2605,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * Try to handle this when parameter is named * by (P) (second part of test). */ - if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) && - (s[2] == Inbrace || s[2] == Inpar))) + if (itype_end(s+1, INAMESPC, 0) != s+1 || + (aspar && isstring(s[1]) && + (s[2] == Inbrace || s[2] == Inpar))) chkset = 1, s++; else if (!inbrace) { /* Special case for `$+' on its own --- leave unmodified */ @@ -2531,6 +2768,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, scanflags |= SCANPM_DQUOTED; if (chkset) scanflags |= SCANPM_CHECKING; + if (!inbrace) + scanflags |= SCANPM_NONAMESPC; /* * Second argument: decide whether to use the subexpression or * the string next on the line as the parameter name. @@ -2556,14 +2795,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, * we let fetchvalue set the main string pointer s to * the end of the bit it's fetched. */ - if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s), - (wantt ? -1 : - ((unset(KSHARRAYS) || inbrace) ? 1 : -1)), - scanflags)) || - (v->pm && (v->pm->node.flags & PM_UNSET)) || - (v->flags & VALFLAG_EMPTY)) + if (!rplyvar && + (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s), + (wantt ? -1 : + ((unset(KSHARRAYS) || inbrace) ? 1 : -1)), + scanflags)) || + (v->pm && (v->pm->node.flags & PM_UNSET)) || + (v->flags & VALFLAG_EMPTY))) vunset = 1; - if (wantt) { /* * Handle the (t) flag: value now becomes the type @@ -2573,13 +2812,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, !(v->pm->node.flags & PM_UNSET))) { int f = v->pm->node.flags; - switch (PM_TYPE(f)) { + switch (PM_TYPE(f)|(f & PM_NAMEREF)) { case PM_SCALAR: val = "scalar"; break; case PM_ARRAY: val = "array"; break; case PM_INTEGER: val = "integer"; break; case PM_EFLOAT: case PM_FFLOAT: val = "float"; break; case PM_HASHED: val = "association"; break; + case PM_NAMEREF: val = "nameref"; break; } val = dupstring(val); if (v->pm->level) @@ -2910,6 +3150,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, chuck(ptr); else ptr++; + } else if (c == Dnull) { + chuck(ptr); + while (*ptr && *ptr != c) + ptr++; + if (*ptr == Dnull) + chuck(ptr); + ptr--; /* Outer loop is about to increment */ } } replstr = (*ptr && ptr[1]) ? ptr+1 : ""; @@ -2926,6 +3173,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, */ if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN))) flags |= SUB_REST; + /* If matching at start and end, don't stop early */ + if ((flags & (SUB_START|SUB_END)) == (SUB_START|SUB_END)) + flags |= SUB_LONG; /* * With ":" treat a value as unset if the variable is set but @@ -3076,7 +3326,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, if (vunset) { if (isset(EXECOPT)) { *idend = '\0'; - zerr("%s: %s", idbeg, *s ? s : "parameter not set"); + if (*s){ + int l; + singsub(&s); + s = unmetafy(s, &l); + zerr("%s: %l", idbeg, s, l); + } else + zerr("%s: %s", idbeg, "parameter not set"); /* * In interactive shell we need to return to * top-level prompt --- don't clear this error @@ -3203,7 +3459,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, shortest = 0; ++s; } - if (*itype_end(s, IIDENT, 0)) { + if (*itype_end(s, INAMESPC, 0)) { untokenize(s); zerr("not an identifier: %s", s); return NULL; @@ -3219,6 +3475,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, char *sval; zip = getaparam(s); if (!zip) { + zip = gethparam(s); + } + if (!zip) { sval = getsparam(s); if (sval) zip = hmkarray(sval); @@ -3263,7 +3522,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, int intersect = (*s == '*' || *s == Star); char **compare, **ap, **apsrc; ++s; - if (*itype_end(s, IIDENT, 0)) { + if (*itype_end(s, INAMESPC, 0)) { untokenize(s); zerr("not an identifier: %s", s); return NULL; @@ -3716,6 +3975,8 @@ colonsubscript: if (presc) { int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG]; int opp = opts[PROMPTPERCENT]; + zattr savecurrent = txtcurrentattrs; + zattr saveunknown = txtunknownattrs; if (presc < 2) { opts[PROMPTPERCENT] = 1; @@ -3738,7 +3999,8 @@ colonsubscript: for (; *ap; ap++) { char *tmps; untokenize(*ap); - tmps = promptexpand(*ap, 0, NULL, NULL, NULL); + txtunknownattrs = TXT_ATTR_ALL; + tmps = promptexpand(*ap, 0, NULL, NULL); *ap = dupstring(tmps); free(tmps); } @@ -3747,10 +4009,14 @@ colonsubscript: if (!copied) val = dupstring(val), copied = 1; untokenize(val); - tmps = promptexpand(val, 0, NULL, NULL, NULL); + txtunknownattrs = TXT_ATTR_ALL; + tmps = promptexpand(val, 0, NULL, NULL); val = dupstring(tmps); free(tmps); } + + txtpendingattrs = txtcurrentattrs = savecurrent; + txtunknownattrs = saveunknown; opts[PROMPTSUBST] = ops; opts[PROMPTBANG] = opb; opts[PROMPTPERCENT] = opp; @@ -4309,6 +4575,8 @@ modify(char **str, char **ptr, int inbrace) break; case 's': + case 'S': + hsubpatopt = (**ptr == 'S'); c = **ptr; (*ptr)++; ptr1 = *ptr; @@ -4403,7 +4671,7 @@ modify(char **str, char **ptr, int inbrace) break; case '&': - c = 's'; + c = hsubpatopt ? 'S' : 's'; break; case 'g': @@ -4492,8 +4760,11 @@ modify(char **str, char **ptr, int inbrace) copy = casemodify(tt, CASMOD_UPPER); break; case 's': + case 'S': + hsubpatopt = (c == 'S'); if (hsubl && hsubr) - subst(©, hsubl, hsubr, gbal); + subst(©, dupstring(hsubl), dupstring(hsubr), + gbal, hsubpatopt); break; case 'q': copy = quotestring(copy, QT_BACKSLASH_SHOWNULL); @@ -4578,8 +4849,11 @@ modify(char **str, char **ptr, int inbrace) *str = casemodify(*str, CASMOD_UPPER); break; case 's': + case 'S': + hsubpatopt = (c == 'S'); if (hsubl && hsubr) - subst(str, hsubl, hsubr, gbal); + subst(str, dupstring(hsubl), dupstring(hsubr), + gbal, hsubpatopt); break; case 'q': *str = quotestring(*str, QT_BACKSLASH); |