diff options
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | Doc/Zsh/expn.yo | 39 | ||||
-rw-r--r-- | Src/lex.c | 26 | ||||
-rw-r--r-- | Src/params.c | 4 | ||||
-rw-r--r-- | Src/subst.c | 128 | ||||
-rw-r--r-- | Test/D04parameter.ztst | 46 |
6 files changed, 241 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog index dd3873860..91b8d964d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2010-11-18 Peter Stephenson <pws@csr.com> + + * 28418: Doc/Zsh/expn.yo, Src/lex.c, Src/params.c, Src/subst.c, + Test/D04parameter.ztst: add ${NAME:OFFSET} and + ${NAME:OFFSET:LENGTH} substitution syntax. + 2010-11-17 Peter Stephenson <pws@csr.com> * 28377: Doc/Zsh/grammar.yo: document more alias problems. @@ -13819,5 +13825,5 @@ ***************************************************** * This is used by the shell to define $ZSH_PATCHLEVEL -* $Revision: 1.5121 $ +* $Revision: 1.5122 $ ***************************************************** diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 64fcd74e3..6f29a8778 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -585,6 +585,45 @@ If var(name) is an array the matching array elements are removed (use the `tt((M))' flag to remove the non-matched elements). ) +xitem(tt(${)var(name)tt(:)var(offset)tt(})) +item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))( +This syntax gives effects similar to parameter subscripting +in the form tt($)var(name)tt({)var(offset)tt(,)var(end)tt(}) but in +a form compatible with other shells. + +If the variable var(name) is a scalar, substitute the contents +starting from offset var(offset); if var(name) is an array, +substitute elements from element var(offset). If var(length) is +given, substitute that many characters or elements, otherwise the +entire rest of the scalar or array. + +var(offset) is treated similarly to a parameter subscript: +the offset of the first character or element in var(name) +is 0 if the option tt(KSH_ARRAYS) is set, else 1; a negative +subscript counts backwards so that -1 corresponds to the last +character or element. + +var(length) is always treated directly as a length and hence may not be +negative. + +var(offset) and var(length) undergo the same set of shell substitutions +as for scalar assignment; in addition, they are then subject to arithmetic +evaluation. Hence, for example + +example(print ${foo:3} +print ${foo: 1 + 2} +print ${foo:$(( 1 + 2))} +print ${foo:$(echo 1 + 2)}) + +all have the same effect. + +Note that if var(offset) is negative, the tt(-) may not appear immediately +after the tt(:) as this indicates the +tt(${)var(name)tt(:-)var(word)tt(}) form of substitution; a space +may be inserted before the tt(-). Furthermore, neither var(offset) nor +var(length) may begin with an alphabetic character or tt(&) as these are +used to indicate history-style modifiers. +) xitem(tt(${)var(name)tt(/)var(pattern)tt(/)var(repl)tt(})) item(tt(${)var(name)tt(//)var(pattern)tt(/)var(repl)tt(}))( Replace the longest possible match of var(pattern) in the expansion of diff --git a/Src/lex.c b/Src/lex.c index 28899fef0..fdb4b98ac 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -1398,7 +1398,12 @@ gettokstr(int c, int sub) } -/* Return non-zero for error (character to unget), else zero */ +/* + * Parse input as if in double quotes. + * endchar is the end character to expect. + * sub has got something to do with whether we are doing quoted substitution. + * Return non-zero for error (character to unget), else zero + */ /**/ static int @@ -1591,14 +1596,20 @@ parsestrnoerr(char *s) return err; } +/* + * Parse a subscript in string s. + * sub is passed down to dquote_parse(). + * endchar is the final character. + * Return the next character, or NULL. + */ /**/ mod_export char * -parse_subscript(char *s, int sub) +parse_subscript(char *s, int sub, int endchar) { int l = strlen(s), err; char *t; - if (!*s || *s == ']') + if (!*s || *s == endchar) return 0; lexsave(); untokenize(t = dupstring(s)); @@ -1607,15 +1618,16 @@ parse_subscript(char *s, int sub) len = 0; bptr = tokstr = s; bsiz = l + 1; - err = dquote_parse(']', sub); + err = dquote_parse(endchar, sub); if (err) { err = *bptr; - *bptr = 0; + *bptr = '\0'; untokenize(s); *bptr = err; - s = 0; - } else + s = NULL; + } else { s = bptr; + } strinend(); inpop(); DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); diff --git a/Src/params.c b/Src/params.c index 7ac33b912..92e0e5368 100644 --- a/Src/params.c +++ b/Src/params.c @@ -1013,7 +1013,7 @@ isident(char *s) return 0; /* Require balanced [ ] pairs with something between */ - if (!(ss = parse_subscript(++ss, 1))) + if (!(ss = parse_subscript(++ss, 1, ']'))) return 0; untokenize(s); return !ss[1]; @@ -1628,7 +1628,7 @@ getindex(char **pptr, Value v, int flags) *s++ = '['; /* Error handled after untokenizing */ - s = parse_subscript(s, flags & SCANPM_DQUOTED); + s = parse_subscript(s, flags & SCANPM_DQUOTED, ']'); /* Now we untokenize everything except inull() markers so we can check * * for the '*' and '@' special subscripts. The inull()s are removed * * in getarg() after we know whether we're doing reverse indexing. */ diff --git a/Src/subst.c b/Src/subst.c index 5f65945a5..c0fb38a48 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -1371,6 +1371,43 @@ untok_and_escape(char *s, int escapes, int tok_arg) return dst; } +/* + * See if an argument str looks like a subscript or length following + * a colon and parse it. It must be followed by a ':' or nothing. + * If this succeeds, expand and return the evaulated expression if + * found, else return NULL. + * + * We assume this is what is meant if the first character is not + * an alphabetic character or '&', which signify modifiers. + * + * Set *endp to point to the next character following. + */ +static char * +check_colon_subscript(char *str, char **endp) +{ + int sav; + + /* Could this be a modifier (or empty)? */ + if (!*str || ialpha(*str) || *str == '&') + return NULL; + + *endp = parse_subscript(str, 0, ':'); + if (!*endp) { + /* No trailing colon? */ + *endp = parse_subscript(str, 0, '\0'); + if (!*endp) + return NULL; + } + sav = **endp; + **endp = '\0'; + if (parsestr(str = dupstring(str))) + return NULL; + singsub(&str); + + **endp = sav; + return str; +} + /* parameter substitution */ #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring) @@ -2683,6 +2720,97 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } val = dupstring(""); } + if (colf && inbrace) { + /* + * Look for ${PARAM:OFFSET} or ${PARAM:OFFSET:LENGTH}. + * This must appear before modifiers. For compatibility + * with bash we perform both standard string substitutions + * and math eval. + */ + char *check_offset2; + char *check_offset = check_colon_subscript(s, &check_offset2); + if (check_offset) { + zlong offset = mathevali(check_offset); + zlong length = (zlong)-1; + if (errflag) + return NULL; + if ((*check_offset2 && *check_offset2 != ':')) { + zerr("invalid subscript: %s", check_offset); + return NULL; + } + if (*check_offset2) { + check_offset = check_colon_subscript(check_offset2 + 1, + &check_offset2); + if (*check_offset2 && *check_offset2 != ':') { + zerr("invalid length: %s", check_offset); + return NULL; + } + length = mathevali(check_offset); + if (errflag) + return NULL; + if (length < (zlong)0) { + zerr("invalid length: %s", check_offset); + return NULL; + } + } + if (!isset(KSHARRAYS) && offset > 0) + offset--; + if (isarr) { + int alen = arrlen(aval), count; + char **srcptr, **dstptr, **newarr; + + if (offset < 0) { + offset += alen; + if (offset < 0) + offset = 0; + } + if (length < 0) + length = alen; + if (offset > alen) + offset = alen; + if (offset + length > alen) + length = alen - offset; + count = length; + srcptr = aval + offset; + newarr = dstptr = (char **) + zhalloc((length+1)*sizeof(char *)); + while (count--) + *dstptr++ = dupstring(*srcptr++); + *dstptr = (char *)NULL; + aval = newarr; + } else { + char *sptr, *eptr; + if (offset < 0) { + MB_METACHARINIT(); + for (sptr = val; *sptr; ) { + sptr += MB_METACHARLEN(sptr); + offset++; + } + if (offset < 0) + offset = 0; + } + MB_METACHARINIT(); + for (sptr = val; *sptr && offset; ) { + sptr += MB_METACHARLEN(sptr); + offset--; + } + if (length >= 0) { + for (eptr = sptr; *eptr && length; ) { + eptr += MB_METACHARLEN(eptr); + length--; + } + val = dupstrpfx(sptr, eptr - sptr); + } else { + val = dupstring(sptr); + } + } + if (!*check_offset2) { + colf = 0; + } else { + s = check_offset2 + 1; + } + } + } if (colf) { /* * History style colon modifiers. May need to apply diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index fe978263f..7c6a465af 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -1256,3 +1256,49 @@ 0:$ZSH_EVAL_CONTEXT and $zsh_eval_context >toplevel >shfunc cmdsubst + + foo="123456789" + print ${foo:3} + print ${foo: 1 + 3} + print ${foo:$(( 2 + 3))} + print ${foo:$(echo 3 + 3)} + print ${foo:3:1} + print ${foo: 1 + 3:(4-2)/2} + print ${foo:$(( 2 + 3)):$(( 7 - 6 ))} + print ${foo:$(echo 3 + 3):`echo 4 - 3`} + print ${foo: -1} + print ${foo: -10} +0:Bash-style subscripts, scalar +>3456789 +>456789 +>56789 +>6789 +>3 +>4 +>5 +>6 +>9 +>123456789 + + foo=(1 2 3 4 5 6 7 8 9) + print ${foo:3} + print ${foo: 1 + 3} + print ${foo:$(( 2 + 3))} + print ${foo:$(echo 3 + 3)} + print ${foo:3:1} + print ${foo: 1 + 3:(4-2)/2} + print ${foo:$(( 2 + 3)):$(( 7 - 6 ))} + print ${foo:$(echo 3 + 3):`echo 4 - 3`} + print ${foo: -1} + print ${foo: -10} +0:Bash-style subscripts, array +>3 4 5 6 7 8 9 +>4 5 6 7 8 9 +>5 6 7 8 9 +>6 7 8 9 +>3 +>4 +>5 +>6 +>9 +>1 2 3 4 5 6 7 8 9 |