diff options
Diffstat (limited to 'Src')
-rw-r--r-- | Src/subst.c | 572 |
1 files changed, 564 insertions, 8 deletions
diff --git a/Src/subst.c b/Src/subst.c index bc7015eef..d2d75ce79 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -300,6 +300,15 @@ multsub(char **s, char ***a, int *isarr, char *sep) while (nonempty(&foo)) *p++ = (char *)ugetnode(&foo); *p = NULL; + /* + * This is the most obscure way of deciding whether a value is + * an array it would be possible to imagine. It seems to result + * partly because we don't pass down the qt and ssub flags from + * paramsubst() through prefork() properly, partly because we + * don't tidy up to get back the return type from multsub we + * need properly. The crux of neatening this up is to get rid + * of the following test. + */ if (a && mult_isarr) { *a = r; *isarr = SCANPM_MATCHMANY; @@ -817,6 +826,23 @@ subst_parse_str(char **sp, int single, int err) #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring) #define isbrack(c) ((c) == '[' || (char)(c) == Inbrack) +/* + * Given a linked list l with node n, perform parameter substitution + * starting from *str. Return the node with the substitutuion performed + * or NULL if it failed. + * + * If qt is true, the `$' was quoted. TODO: why can't we just look + * to see if the first character was String or Qstring? + * + * If ssub is true, we are being called via singsubst(), which means + * the result will be a single word. TODO: can we generate the + * single word at the end? TODO: if not, or maybe in any case, + * can we pass down the ssub flag from prefork with the other flags + * instead of pushing it into different arguments? (How exactly + * to qt and ssub differ? Are both necessary, if so is there some + * better way of separating the two?) + */ + /**/ LinkNode paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) @@ -824,43 +850,207 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) char *aptr = *str, c, cc; char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n); int colf; /* != 0 means we found a colon after the name */ + /* + * There are far too many flags. They need to be grouped + * together into some structure which ties them to where they + * came from. + * + * Some flags have a an obscure relationship to their effect which + * depends on incrementing them to particular values in particular + * ways. + */ + /* + * Whether the value is an array (in aval) or not (in val). There's + * a movement from storing the value in the stuff read from the + * parameter (the value v) to storing them in val and aval. + * However, sometimes you find v reappearing temporarily. + * + * The values -1 and 2 are special to isarr. It looks like 2 is + * some kind of an internal flag to do with whether the array's been + * copied, in which case I don't know why we don't use the copied + * flag, but they do both occur close together so they presumably + * have different effects. The value -1 is isued to force us to + * keep an empty array. It's tested in the YUK chunk (I mean the + * one explicitly marked as such). + */ int isarr = 0; + /* + * This is just the setting of the option except we need to + * take account of ^ and ^^. + */ int plan9 = isset(RCEXPANDPARAM); + /* + * Likwise, but with ~ and ~~. Also, we turn it off later + * on if qt is passed down. + */ int globsubst = isset(GLOBSUBST); + /* + * Indicates ${#pm}, massaged by whichlen which is set by + * the (c), (w), and (W) flags to indicate how we take the length. + */ int getlen = 0; int whichlen = 0; + /* + * Indicates ${+pm}: a simple boolean for once. + */ int chkset = 0; + /* + * Indicates we have tried to get a value in v but that was + * unset. I don't quite understand why (v == NULL) isn't + * good enough, but there are places where we seem to need + * to second guess whether a value is a real value or not. + */ int vunset = 0; + /* + * Indicates (t) flag, i.e. print out types. The code for + * this actually isn't too horrifically inbred compared with + * that for (P). + */ int wantt = 0; + /* + * Indicates spliting a string into an array. There aren't + * actually that many special cases for this --- which may + * be why it doesn't work properly; we split in some cases + * where we shouldn't, in particular on the multsubs for + * handling embedded values for ${...=...} and the like. + */ int spbreak = isset(SHWORDSPLIT) && !ssub && !qt; + /* Scalar and array value, see isarr above */ char *val = NULL, **aval = NULL; + /* + * Padding based on setting in parameter rather than substitution + * flags. This is only used locally. + */ unsigned int fwidth = 0; + /* + * vbuf and v are both used to retrieve parameter values; this + * is a kludge, we pass down vbuf and it may or may not return v. + */ struct value vbuf; Value v = NULL; + /* + * This expressive name refers to the set of flags which + * is applied to matching for #, %, / and their doubled variants: + * (M), (R), (B), (E), (N), (S). + */ int flags = 0; + /* Value from (I) flag, used for ditto. */ int flnum = 0; + /* + * sortit is an obscure combination of the settings for (o), (O), + * (i) and (n). casind is (i) and numord is (n); these are + * separate so we can have fun doing the obscure combinatorics later. + * indord is the (a) flag, which for consistency doesn't get + * combined into sortit. + */ int sortit = 0, casind = 0, numord = 0, indord = 0; + /* (u): straightforward. */ int unique = 0; + /* combination of (L), (U) and (C) flags. */ int casmod = 0; + /* + * quotemod says we are doing either (q) (positive), (Q) (negative) + * or not (0). quotetype counts the q's for the first case. + * quoterr is simply (X) but gets passed around a lot because the + * combination (eX) needs it. + */ int quotemod = 0, quotetype = 0, quoteerr = 0; + /* + * (V) flag: fairly straightforward, except that as with so + * many flags it's not easy to decide where to put it in the order. + */ int visiblemod = 0; + /* + * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied + * spbreak, see above; fairly straighforward in use but c.f. + * the comment for visiblemod. + */ int shsplit = 0; + /* + * The separator from (j) and (s) respectively, or (F) and (f) + * respectively (hardwired to "\n" in that case). Slightly + * confusingly also used for ${#pm}, thought that's at least + * documented in the manual + */ char *sep = NULL, *spsep = NULL; + /* + * Padding strings. The left and right padding strings which + * are repeated, then the ones which only occur once, for + * the (l) and (r) flags. + */ char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL; - char *replstr = NULL; /* replacement string for /orig/repl */ + /* Replacement string for /orig/repl and //orig/repl */ + char *replstr = NULL; + /* The numbers for (l) and (r) */ zlong prenum = 0, postnum = 0; + /* + * Whether the value has been copied. Optimisation: if we + * are modifying an expression, we only need to copy it the + * first time, and if we don't modify it we can just use the + * value from the parameter or input. + */ int copied = 0; + /* + * The (A) flag for array assignment, with consequences for + * splitting and joining; (AA) gives arrasg == 2 for associative + * arrays. + */ int arrasg = 0; + /* + * The (e) flag. As we need to do extra work not quite + * at the end, the effect of this is kludged in in several places. + */ int eval = 0; + /* + * The (P) flag. This interacts a bit obscurely with whether + * or not we are dealing with a sub expression (subexp). + */ int aspar = 0; + /* + * The (%) flag, c.f. visiblemod again. + */ int presc = 0; + /* + * The (@) flag; interacts obscurely with qt and isarr. + * This is one of the things that decides whether multsub + * will produce an array, but in an extremely indirect fashion. + */ int nojoin = 0; - char inbrace = 0; /* != 0 means ${...}, otherwise $... */ + /* + * != 0 means ${...}, otherwise $... What works without braces + * is largely a historical artefact (everything works with braces, + * I sincerely hope). + */ + char inbrace = 0; + /* + * Use for the (k) flag. Goes down into the parameter code, + * sometimes. + */ char hkeys = 0; + /* + * Used for the (v) flag, ditto. Not quite sure why they're + * separate, but the tradition seems to be that things only + * get combined when that makes the result more obscure rather + * than less. + */ char hvals = 0; + /* + * Whether we had to evaluate a subexpression, i.e. an + * internal ${...} or $(...) or plain $pm. We almost don't + * need to remember this (which would be neater), but the (P) + * flag means the subexp and !subexp code is obscurely combined, + * and the argument passing to fetchvalue has another kludge. + */ int subexp; *s++ = '\0'; + /* + * Nothing to do unless the character following the $ is + * something we recognise. + * + * Shouldn't this be a table or something? We test for all + * these later on, too. + */ if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' && c != '!' && c != '$' && c != String && c != Qstring && c != '?' && c != Quest && c != '_' && @@ -872,9 +1062,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) return n; } DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()"); + /* + * Extra processing if there is an opening brace: mostly + * flags in parentheses, but also one ksh hack. + */ if (c == Inbrace) { inbrace = 1; s++; + /* + * In ksh emulation a leading `!' is a special flag working + * sort of like our (k). + * TODO: this is one of very few cases tied directly to + * the emulation mode rather than an option. Since ksh + * doesn't have parameter flags it might be neater to + * handle this with the ^, =, ~ stuff, below. + */ if ((c = *s) == '!' && s[1] != Outbrace && emulation == EMULATE_KSH) { hkeys = SCANPM_WANTKEYS; s++; @@ -882,6 +1084,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) char *t, sav; int tt = 0; zlong num; + /* + * The (p) flag is (uniquely) only remembered within + * this block. It says we do print-style handling + * on the values for flags, but only on those. + * This explains the ghastly macro, but why can't it + * be a function? UNTOK_AND_ESCAPE is defined + * so that the argument must be an lvalue. + */ int escapes = 0; int klen; #define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C)) @@ -1089,22 +1299,40 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) s++; } } + /* Sort is done by indexing on sortit-1: + * bit 1: ascending (o)/descending (O) + * bit 2: case sensitive/independent (i) + * bit 3: strict order/numeric (n) + * unless indord (a) is set set, in which case only test for + * descending by assuming only (O) is possible (not verified). + */ if (sortit) sortit += (casind << 1) + (numord << 2); + /* + * premul, postmul specify the padding character to be used + * multiple times with the (l) and (r) flags respectively. + */ if (!premul) premul = " "; if (!postmul) postmul = " "; + /* + * Look for special unparenthesised flags. + * TODO: could make these able to appear inside parentheses, too, + * i.e. ${(^)...} etc. + */ for (;;) { if ((c = *s) == '^' || c == Hat) { + /* RC_EXPAND_PARAM on or off (doubled )*/ if ((c = *++s) == '^' || c == Hat) { plan9 = 0; s++; } else plan9 = 1; } else if ((c = *s) == '=' || c == Equals) { + /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */ if ((c = *++s) == '=' || c == Equals) { spbreak = 0; s++; @@ -1114,19 +1342,33 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) (iident(cc = s[1]) || cc == '*' || cc == Star || cc == '@' || cc == '-' || (cc == ':' && s[2] == '-') - || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) + || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) { getlen = 1 + whichlen, s++; - else if (c == '~' || c == Tilde) { + /* + * Return the length of the parameter. + * getlen can be more than 1 to indicate characters (2), + * words ignoring multiple delimiters (3), words taking + * account of multiple delimiters. delimiter is in + * spsep, NULL means $IFS. + */ + } else if (c == '~' || c == Tilde) { + /* GLOB_SUBST on or off (doubled) */ if ((c = *++s) == '~' || c == Tilde) { globsubst = 0; s++; } else globsubst = 1; } else if (c == '+') { + /* + * Return whether indicated parameter is set. + * Try to handle this when parameter is named + * by (P) (second part of test). + */ if (iident(s[1]) || (aspar && isstring(s[1]) && (s[2] == Inbrace || s[2] == Inpar))) chkset = 1, s++; else if (!inbrace) { + /* Special case for `$+' on its own --- leave unmodified */ *aptr = '$'; *str = aptr + 1; return n; @@ -1134,13 +1376,31 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) zerr("bad substitution", NULL, 0); return NULL; } - } else if (inbrace && INULL(*s)) + } else if (inbrace && INULL(*s)) { + /* + * Handles things like ${(f)"$(<file)"} by skipping + * the double quotes. We don't need to know what was + * actually there; the presence of a String or Qstring + * is good enough. + */ s++; - else + } else break; } + /* Don't activate special pattern characters if inside quotes */ globsubst = globsubst && !qt; + /* + * At this point, we usually expect a parameter name. + * However, there may be a nested ${...} or $(...). + * These say that the parameter itself is somewhere inside, + * or that there isn't a parameter and we will get the values + * from a command substitution itself. In either case, + * the current instance of paramsubst() doesn't fetch a value, + * it just operates on what gets passed up. + * (The first ought to have been {...}, reserving ${...} + * for substituting a value at that point, but it's too late now.) + */ idbeg = s; if ((subexp = (inbrace && s[-1] && isstring(*s) && (s[1] == Inbrace || s[1] == Inpar)))) { @@ -1151,26 +1411,78 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s); sav = *s; *s = 0; + /* + * This handles arrays. TODO: this is not the most obscure call to + * multsub() (see below) but even so it would be nicer to pass down + * and back the arrayness more rationally. In that case, we should + * remove the aspar test and extract a value from an array, if + * necessary, when we handle (P) lower down. + */ if (multsub(&val, (aspar ? NULL : &aval), &isarr, NULL) && quoted) { + /* Empty quoted string --- treat as null string, not elided */ isarr = -1; aval = (char **) hcalloc(sizeof(char *)); aspar = 0; } else if (aspar) idbeg = val; *s = sav; + /* + * This tests for the second double quote in an expression + * like ${(f)"$(<file)"}, compare above. + */ while (INULL(*s)) s++; v = (Value) NULL; } else if (aspar) { + /* + * No subexpression, but in any case the value is going + * to give us the name of a parameter on which we do + * our remaining processing. In other words, this + * makes ${(P)param} work like ${(P)${param}}. (Probably + * better looked at, this is the basic code for ${(P)param} + * and it's been kludged into the subexp code because no + * opportunity for a kludge has been neglected.) + */ if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) { val = idbeg = getstrvalue(v); subexp = 1; } else vunset = 1; } + /* + * We need to retrieve a value either if we haven't already + * got it from a subexpression, or if the processing so + * far has just yielded us a parameter name to be processed + * with (P). + */ if (!subexp || aspar) { char *ov = val; + /* + * Second argument: decide whether to use the subexpression or + * the string next on the line as the parameter name. + * Third argument: decide how processing for brackets + * 1 means full processing + * -1 appears to mean something along the lines of + * only handle single digits and don't handle brackets. + * I *think* (but it's really only a guess) that this + * is used by the test below the wantt handling, so + * that in certain cases we handle brackets there. + * 0 would apparently mean something like we know we + * should have the name of a scalar and we get cross + * if there's anything present which disagrees with that + * but you will search fetchvalue() in vain for comments on this. + * Fourth argument gives flags to do with keys, values, quoting, + * assigning depending on context and parameter flags. + * + * This is the last mention of subexp, so presumably this + * is what the code which makes sure subexp is set if aspar (the + * (P) flag) is set. I *think* what's going on here is the + * second argument is for both input and output: with + * subexp, we only want the input effect, whereas normally + * we let fetchvalue set the main string pointer s to + * the end of the bit it's fetched. + */ if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s), (wantt ? -1 : ((unset(KSHARRAYS) || inbrace) ? 1 : -1)), @@ -1181,6 +1493,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) vunset = 1; if (wantt) { + /* + * Handle the (t) flag: value now becomes the type + * information for the parameter. + */ if (v && v->pm && !(v->pm->flags & PM_UNSET)) { int f = v->pm->flags; @@ -1227,8 +1543,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) isarr = 0; } } + /* + * We get in here two ways; either we need to convert v into + * the local value system, or we need to get rid of brackets + * even if there isn't a v. + */ while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) { if (!v) { + /* + * Index applied to non-existent parameter; we may or may + * not have a value to index, however. Create a temporary + * empty parameter as a trick, and index on that. This + * usually happens the second time around the loop when + * we've used up the original parameter value and want to + * apply a subscript to what's left. However, it's also + * possible it's got something to do with some of that murky + * passing of -1's as the third argument to fetchvalue() to + * inhibit bracket parsing at that stage. + */ Param pm; char *os = s; @@ -1251,6 +1583,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (getindex(&s, v, qt) || s == os) break; } + /* + * This is where we extract a value (we know now we have + * one) into the local parameters for a scalar (val) or + * array (aval) value. TODO: move val and aval into + * a structure with a discriminator. Hope we can make + * more things array values at this point and dearrayify later. + * v->isarr tells us whether the stuff form down below looks + * like an array. Unlike multsub() this is probably clean + * enough to keep, although possibly the parameter passing + * needs reorganising. + * + * I think we get to discard the existing value of isarr + * here because it's already been taken account of, either + * in the subexp stuff or immediately above. + */ if ((isarr = v->isarr)) { /* No way to get here with v->inv != 0, so getvaluearr() * * is called by getarrvalue(); needn't test PM_HASHED. */ @@ -1260,7 +1607,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } else aval = getarrvalue(v); } else { + /* Value retrieved from parameter/subexpression is scalar */ if (v->pm->flags & PM_ARRAY) { + /* + * Although the value is a scalar, the parameter + * itself is an array. Presumably this is due to + * being quoted, or doing single substitution or something, + * TODO: we're about to do some definitely stringy + * stuff, so something like this bit is probably + * necessary. However, I'd like to leave any + * necessary joining of arrays until this point + * to avoid the multsub() horror. + */ int tmplen = arrlen(v->pm->gets.afn(v->pm)); if (v->start < 0) @@ -1269,6 +1627,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) vunset = 1; } if (!vunset) { + /* + * There really is a value. Apply any necessary + * padding or case transformation. Note these + * are the per-parameter transformations specified + * with typeset, not the per-substitution ones set + * by flags. TODO: maybe therefore this would + * be more consistent if moved into getstrvalue()? + * Bet that's easier said than done. + */ val = getstrvalue(v); fwidth = v->pm->ct ? v->pm->ct : strlen(val); switch (v->pm->flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) { @@ -1335,10 +1702,25 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } } + /* + * Finished with the original parameter and its indices; + * carry on looping to see if we need to do more indexing. + * This means we final get rid of v in favour of val and + * aval. We could do with somehow encapsulating the bit + * where we need v. + */ v = NULL; if (!inbrace) break; } + /* + * We're now past the name or subexpression; the only things + * which can happen now are a closing brace, one of the standard + * parameter postmodifiers, or a history-style colon-modifier. + * + * Again, this duplicates tests for characters we're about to + * examine properly later on. + */ if (inbrace && (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' && c != '=' && c != Equals && @@ -1348,6 +1730,30 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) zerr("bad substitution", NULL, 0); return NULL; } + /* + * Join arrays up if we're in quotes and there isn't some + * override such as (@). + * TODO: hmm, if we're called as part of some recursive + * substitution do we want to delay this until we get back to + * the top level? Or is if there's a qt (i.e. this parameter + * substitution is in quotes) always good enough? Potentially + * we may be OK by now --- all potential `@'s and subexpressions + * have been handled, including any [@] index which comes up + * by virture of v->isarr being set to SCANPM_ISVAR_AT which + * is now in isarr. + * + * However, if we are replacing multsub() with something that + * doesn't mangle arrays, we may need to delay this step until after + * the foo:- or foo:= or whatever that causes that. Note the value + * (string or array) at this point is irrelevant if we are going to + * be doing that. This would mean // and stuff get applied + * arraywise even if quoted. That's probably wrong, so maybe + * this just stays. + * + * We do a separate stage of dearrayification in the YUK chunk, + * I think mostly because of the way we make array or scalar + * values appear to the caller. + */ if (isarr) { if (nojoin) isarr = -1; @@ -1358,9 +1764,20 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } idend = s; - if (inbrace) + if (inbrace) { + /* + * This is to match a closing double quote in case + * we didn't have a subexpression, e.g. ${"foo"}. + * This form is pointless, but logically it ought to work. + */ while (INULL(*s)) s++; + } + /* + * We don't yet know whether a `:' introduces a history-style + * colon modifier or qualifies something like ${...:=...}. + * But if we remember the colon here it's easy to check later. + */ if ((colf = *s == ':')) s++; @@ -1391,13 +1808,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (inbrace && ((c = *s) == '-' || c == '+' || - c == ':' || + c == ':' || /* i.e. a doubled colon */ c == '=' || c == Equals || c == '%' || c == '#' || c == Pound || c == '?' || c == Quest || c == '/')) { + /* + * Default index is 1 if no (I) or (I) gave zero. But + * why don't we set the default explicitly at the start + * and massage any passed index where we set flnum anyway? + */ if (!flnum) flnum++; if (c == '%') @@ -1455,6 +1877,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) *ptr = '\0'; } + /* See if this was ${...:-...}, ${...:=...}, etc. */ if (colf) flags |= SUB_ALL; /* @@ -1487,12 +1910,23 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) * (except according to $@ rules); but this leaves the * unquoted substrings unsplit, and other code below * for spbreak splits even within the quoted substrings. + * + * TODO: I think multsub needs to be told enough to + * decide about splitting with spbreak at this point + * (and equally in the `=' handler below). Then + * we can turn off spbreak to avoid the join & split + * nastiness later. + * + * What we really want to do is make this look as + * if it were the result of an assignment from + * the same value, taking account of quoting. */ multsub(&val, (aspar ? NULL : &aval), &isarr, NULL); copied = 1; } break; case ':': + /* this must be `::=', unconditional assignment */ if (*s != '=' && *s != Equals) goto noclosebrace; vunset = 1; @@ -1507,11 +1941,22 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) *idend = '\0'; val = dupstring(s); isarr = 0; + /* + * TODO: this is one of those places where I don't + * think we want to do the joining until later on. + * We also need to handle spbreak and spsep at this + * point and unset them. + */ if (spsep || spbreak || !arrasg) multsub(&val, NULL, NULL, sep); else multsub(&val, &aval, &isarr, NULL); if (arrasg) { + /* + * This is an array assignment in a context + * where we have no syntactic way of finding + * out what an array element is. So we just guess. + */ char *arr[2], **t, **a, **p; if (spsep || spbreak) { aval = sepsplit(val, spsep, 0, 1); @@ -1635,6 +2080,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) #endif } + /* + * Either loop over an array doing replacements or + * do the replacment on a string. + */ if (!vunset && isarr) { getmatcharr(&aval, s, flags, flnum, replstr); copied = 1; @@ -1647,6 +2096,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) break; } } else { /* no ${...=...} or anything, but possible modifiers. */ + /* + * Handler ${+...}. TODO: strange, why do we handle this only + * if there isn't a trailing modifier? Why don't we do this + * e.g. when we hanlder the ${(t)...} flag? + */ if (chkset) { val = dupstring(vunset ? "0" : "1"); isarr = 0; @@ -1659,6 +2113,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = dupstring(""); } if (colf) { + /* + * History style colon modifiers. May need to apply + * on multiple elements of an array. + */ s--; if (unset(KSHARRAYS) || inbrace) { if (!isarr) @@ -1695,6 +2153,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } if (errflag) return NULL; + /* + * This handles taking a length with ${#foo} and variations. + * TODO: again. one might naively have thought this had the + * same sort of effect as the ${(t)...} flag and the ${+...} + * test, although in this case we do need the value rather + * the the parameter, so maybe it's a bit different. + */ if (getlen) { long len = 0; char buf[14]; @@ -1725,6 +2190,23 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = dupstring(buf); isarr = 0; } + /* + * I think this mult_isarr stuff here is used to pass back + * the setting of whether we are an array to multsub, and + * thence to the top-level paramsubst(). The way the + * setting is passed back is completely obscure, however. + * It's presumably at this point because we try to remember + * whether the value was `really' an array before massaging + * some special cases. + * + * TODO: YUK. This is not the right place to turn arrays into + * scalars; we should pass back as an array, and let the calling + * code decide how to deal with it. This is almost certainly + * a lot harder than it sounds. Do we really need to handle + * one-element arrays as scalars at this point? Couldn't + * we just test for it later rather than having a multiple-valued + * wave-function for isarr? + */ mult_isarr = isarr; if (isarr > 0 && !plan9 && (!aval || !aval[0])) { val = dupstring(""); @@ -1739,6 +2221,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } /* ssub is true when we are called from singsub (via prefork). * It means that we must join arrays and should not split words. */ + /* + * TODO: this is what is screwing up the use of SH_WORD_SPLIT + * after `:-' etc. If we fix multsub(), we might get away + * with simply unsetting the appropriate flags when they + * get handled. + */ if (ssub || spbreak || spsep || sep) { if (isarr) val = sepjoin(aval, sep, 1), isarr = 0; @@ -1753,6 +2241,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } mult_isarr = isarr; } + /* + * Perform case modififications. + */ if (casmod) { if (isarr) { char **ap; @@ -1782,6 +2273,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) makecapitals(&val); } } + /* + * Perform prompt-style modifications. + */ if (presc) { int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG]; int opp = opts[PROMPTPERCENT], len; @@ -1790,6 +2284,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) opts[PROMPTPERCENT] = 1; opts[PROMPTSUBST] = opts[PROMPTBANG] = 0; } + /* + * TODO: It would be really quite nice to abstract the + * isarr and !issarr code into a function which gets + * passed a pointer to a function with the effect of + * the promptexpand bit. Then we could use this for + * a lot of stuff and bury val/aval/isarr inside a structure + * which gets passed to it. + */ if (isarr) { char **ap; @@ -1820,6 +2322,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) opts[PROMPTBANG] = opb; opts[PROMPTPERCENT] = opp; } + /* + * One of the possible set of quotes to apply, depending on + * the repetitions of the (q) flag. + */ if (quotemod) { if (--quotetype > 3) quotetype = 3; @@ -1903,6 +2409,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } } + /* + * Transform special characters in the string to make them + * printable. + */ if (visiblemod) { if (isarr) { char **ap; @@ -1916,6 +2426,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = nicedupstring(val); } } + /* + * Nothing particularly to do with SH_WORD_SPLIT --- this + * performs lexical splitting on a string as specified by + * the (z) flag. + */ if (shsplit) { LinkList list = NULL; @@ -1944,6 +2459,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } copied = 1; } + /* + * TODO: hmm. At this point we have to be on our toes about + * whether we're putting stuff into a line or not, i.e. + * we don't want to do this from a recursive call; this is + * probably part of the point of the mult_isarr monkey business. + * Rather than passing back flags in a non-trivial way, maybe + * we could decide on the basis of flags passed down to us. + * + * This is the ideal place to do any last-minute conversion from + * array to strings. However, given all the transformations we've + * already done, probably if it's going to be done it will already + * have been. (I'd really like to keep everying in aval or + * equivalent and only locally decide if we need to treat it + * as a scalar.) + */ if (isarr) { char *x; char *y; @@ -1951,6 +2481,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) int i; LinkNode on = n; + /* Handle the (u) flag; we need this before the next test */ if (unique) { if(!copied) aval = arrdup(aval); @@ -1960,6 +2491,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) zhuniqarray(aval); } if ((!aval[0] || !aval[1]) && !plan9) { + /* + * Empty array or single element. Currently you only + * get a single element array at this point from the + * unique expansion above. but we can potentially + * have other reasons. + * + * The following test removes the markers + * from surrounding double quotes, but I don't know why + * that's necessary. + */ int vallen; if (aptr > (char *) getdata(n) && aptr[-1] == Dnull && *fstr == Dnull) @@ -1977,6 +2518,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) setdata(n, y); return n; } + /* Handle (o) and (O) and their variants */ if (sortit) { if (!copied) aval = arrdup(aval); @@ -2004,6 +2546,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } if (plan9) { + /* Handle RC_EXPAND_PARAM */ LinkNode tn; local_list1(tl); @@ -2049,6 +2592,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) return n; } } else { + /* + * Not RC_EXPAND_PARAM: simply join the first and + * last values. + * TODO: how about removing the restriction that + * aval[1] is non-NULL to promote consistency?, or + * simply changing the test so that we drop into + * the scalar branch, instead of tricking isarr? + */ x = aval[0]; if (prenum || postnum) x = dopadding(x, prenum, postnum, preone, postone, @@ -2095,6 +2646,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) if (eval) n = on; } else { + /* + * Scalar value. Handle last minute transformations + * such as left- or right-padding and the (e) flag to + * revaluate the result. + */ int xlen; char *x; char *y; |