From ca0cb170119b3cef4ccdfc743b6c225e9e6eed53 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 17 Nov 2015 17:44:12 +0000 Subject: 37128: work around alias expansion trashing subscript parsing --- ChangeLog | 3 +++ Src/lex.c | 28 ++++++++++++++++++++++------ Test/D06subscript.ztst | 17 +++++++++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8b11e2d09..1000ae611 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2015-11-17 Peter Stephenson + * 37128: Src/lex.c, Test/D06subscript.ztst: work around alias + expansion trashing subcript being parsed. + * 37127 (minor tweak): Doc/Zsh/params.yo: document indexing of $signals. diff --git a/Src/lex.c b/Src/lex.c index 89af96123..81904c171 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -1617,7 +1617,7 @@ parsestrnoerr(char **s) mod_export char * parse_subscript(char *s, int sub, int endchar) { - int l = strlen(s), err; + int l = strlen(s), err, toklen; char *t; if (!*s || *s == endchar) @@ -1626,18 +1626,34 @@ parse_subscript(char *s, int sub, int endchar) untokenize(t = dupstring(s)); inpush(t, 0, NULL); strinbeg(0); + /* + * Warning to Future Generations: + * + * This way of passing the subscript through the lexer is brittle. + * Code above this for several layers assumes that when we tokenise + * the input it goes into the same place as the original string. + * However, the lexer may overwrite later bits of the string or + * reallocate it, in particular when expanding aliaes. To get + * around this, we copy the string and then copy it back. This is a + * bit more robust but still relies on the underlying assumption of + * length preservation. + */ lexbuf.len = 0; - lexbuf.ptr = tokstr = s; + lexbuf.ptr = tokstr = dupstring(s); lexbuf.siz = l + 1; err = dquote_parse(endchar, sub); + toklen = (int)(lexbuf.ptr - tokstr); + DPUTS(toklen > l, "Bad length for parsed subscript"); + memcpy(s, tokstr, toklen); if (err) { - err = *lexbuf.ptr; - *lexbuf.ptr = '\0'; + char *strend = s + toklen; + err = *strend; + *strend = '\0'; untokenize(s); - *lexbuf.ptr = err; + *strend = err; s = NULL; } else { - s = lexbuf.ptr; + s += toklen; } strinend(); inpop(); diff --git a/Test/D06subscript.ztst b/Test/D06subscript.ztst index cffca742e..144923667 100644 --- a/Test/D06subscript.ztst +++ b/Test/D06subscript.ztst @@ -249,3 +249,20 @@ string[0]=! 1:Can't set only element zero of string ?(eval):1: string: assignment to invalid subscript range + + typeset -A assoc=(leader topcat officer dibble sidekick choochoo) + alias myind='echo leader' myletter='echo 1' myletter2='echo 4' + print ${assoc[$(myind)]} + print $assoc[$(myind)] + print ${assoc[$(myind)][$(myletter)]}${assoc[$(myind)][$(myletter2)]} + assoc[$(myind)]='of the gang' + print ${assoc[$(myind)]} + print $assoc[$(myind)] + print $assoc[leader] +0: Parsing subscript with non-trivial tokenisation +>topcat +>topcat +>tc +>of the gang +>of the gang +>of the gang -- cgit 1.4.1