7 files changed, 242 insertions, 51 deletions
diff --git a/ChangeLog b/ChangeLog
index 2203b9dc8..774d7044b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2001-04-19  Bart Schaefer  <schaefer@zsh.org>
+
+	* 14008, 1405x: Src/lex.c, Src/math.c, Src/params.c,
+	Test/D06subscript.ztst: Improve parsing of subscripts so that
+	many forms that failed before, work now, particularly for assoc
+	array keys; create a test suite.
+
+	* 14008: Src/hist.c: Don't getsparam("HISTFILE") until it is
+	actually needed, because other parameter expansions reset the
+	history mechanism for subscript parsing.
+
+	* 14008: Src/builtin.c: Keep track of the new Param created when
+	typeset_single() calls setsparam() in some circumstances; drop a
+	redundant isident() test.
+
 2001-04-19  Peter Stephenson  <pws@pwstephenson.fsnet.co.uk>
 
 	* 14046: Doc/Zle/zle.yo, Src/Zle/zle_keymap.c: bindkey -rp removes
diff --git a/Src/builtin.c b/Src/builtin.c
index 627a3b82c..0f02ae0a9 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -1690,8 +1690,8 @@ typeset_single(char *cname, char *pname, Param pm, int func,
 		delenv(pm->env);
 		pm->env = NULL;
 	    }
-	    if (value)
-		setsparam(pname, ztrdup(value));
+	    if (value && !(pm = setsparam(pname, ztrdup(value))))
+		return 0;
 	} else if (value) {
 	    zwarnnam(cname, "can't assign new value for array %s", pname, 0);
 	    return NULL;
@@ -1807,9 +1807,10 @@ typeset_single(char *cname, char *pname, Param pm, int func,
 	pm->level = keeplocal;
     else if (on & PM_LOCAL)
 	pm->level = locallevel;
-    if (value && !(pm->flags & (PM_ARRAY|PM_HASHED)))
-	setsparam(pname, ztrdup(value));
-    else if (newspecial && !(pm->old->flags & PM_NORESTORE)) {
+    if (value && !(pm->flags & (PM_ARRAY|PM_HASHED))) {
+	if (!(pm = setsparam(pname, ztrdup(value))))
+	    return 0;
+    } else if (newspecial && !(pm->old->flags & PM_NORESTORE)) {
 	/*
 	 * We need to use the special setting function to re-initialise
 	 * the special parameter to empty.
@@ -2061,12 +2062,6 @@ bin_typeset(char *name, char **argv, char *ops, int func)
 
     /* Take arguments literally.  Don't glob */
     while ((asg = getasg(*argv++))) {
-	/* check if argument is a valid identifier */
-	if (!isident(asg->name)) {
-	    zerr("not an identifier: %s", asg->name, 0);
-	    returnval = 1;
-	    continue;
-	}
 	if (!typeset_single(name, asg->name,
 			    (Param) (paramtab == realparamtab ?
 				     gethashnode2(paramtab, asg->name) :
diff --git a/Src/hist.c b/Src/hist.c
index a80a21967..58fe748be 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -1011,7 +1011,6 @@ hend(Eprog prog)
     DPUTS(stophist != 2 && !(inbufflags & INP_ALIAS) && !chline,
 	  "BUG: chline is NULL in hend()");
     queue_signals();
-    hf = getsparam("HISTFILE");
     if (histdone & HISTFLAG_SETTY)
 	settyinfo(&shttyinfo);
     if (!(histactive & HA_NOINC))
@@ -1028,6 +1027,7 @@ hend(Eprog prog)
      && (hist_ignore_all_dups = isset(HISTIGNOREALLDUPS)) != 0)
 	histremovedups();
     /* For history sharing, lock history file once for both read and write */
+    hf = getsparam("HISTFILE");
     if (isset(SHAREHISTORY) && lockhistfile(hf, 0)) {
 	readhistfile(hf, 0, HFILE_USE_OPTIONS | HFILE_FAST);
 	curline.histnum = curhist+1;
diff --git a/Src/lex.c b/Src/lex.c
index de58ade7a..46d83cb21 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1302,10 +1302,13 @@ dquote_parse(char endchar, int sub)
 	    c = hgetc();
 	    if (c != '\n') {
 		if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
-		    c == endchar || c == '`')
+		    c == endchar || c == '`' ||
+		    (math && (c == '[' || c == ']' ||
+			      c == '(' || c == ')' ||
+			      c == '{' || c == '}')))
 		    add(Bnull);
 		else {
-		    /* lexstop is implicitely handled here */
+		    /* lexstop is implicitly handled here */
 		    add('\\');
 		    goto cont;
 		}
@@ -1458,6 +1461,38 @@ parsestrnoerr(char *s)
     return err;
 }
 
+/**/
+mod_export char *
+parse_subscript(char *s)
+{
+    int l = strlen(s), err;
+    char *t;
+
+    if (!*s || *s == ']')
+	return 0;
+    lexsave();
+    untokenize(t = dupstring(s));
+    inpush(t, 0, NULL);
+    strinbeg(0);
+    len = 0;
+    bptr = tokstr = s;
+    bsiz = l + 1;
+    err = dquote_parse(']', 1);
+    if (err) {
+	err = *bptr;
+	*bptr = 0;
+	untokenize(s);
+	*bptr = err;
+	s = 0;
+    } else
+	s = bptr;
+    strinend();
+    inpop();
+    DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
+    lexrestore();
+    return s;
+}
+
 /* Tokenize a string given in s. Parsing is done as if s were a normal *
  * command-line argument but it may contain separators.  This is used  *
  * to parse the right-hand side of ${...%...} substitutions.           */
diff --git a/Src/math.c b/Src/math.c
index bad958243..0ea16e7ba 100644
--- a/Src/math.c
+++ b/Src/math.c
@@ -517,6 +517,7 @@ setvar(char *s, mnumber v)
     }
     if (noeval)
 	return v;
+    untokenize(s);
     setnparam(s, v);
     return v;
 }
diff --git a/Src/params.c b/Src/params.c
index 90dd5f3c7..aaeebce76 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -770,38 +770,18 @@ isident(char *s)
 	if (!iident(*ss))
 	    break;
 
-#if 0
-    /* If this exhaust `s' or the next two characters *
-     * are [(, then it is a valid identifier.         */
-    if (!*ss || (*ss == '[' && ss[1] == '('))
-	return 1;
-
-    /* Else if the next character is not [, then it is *
-     * definitely not a valid identifier.              */
-    if (*ss != '[')
-	return 0;
-
-    noeval = 1;
-    (void)mathevalarg(++ss, &ss);
-    if (*ss == ',')
-	(void)mathevalarg(++ss, &ss);
-    noeval = ne;		/* restore the value of noeval */
-    if (*ss != ']' || ss[1])
-	return 0;
-    return 1;
-#else
     /* If the next character is not [, then it is *
-     * definitely not a valid identifier.              */
+     * definitely not a valid identifier.         */
     if (!*ss)
 	return 1;
     if (*ss != '[')
 	return 0;
 
-    /* Require balanced [ ] pairs */
-    if (skipparens('[', ']', &ss))
+    /* Require balanced [ ] pairs with something between */
+    if (!(ss = parse_subscript(++ss)))
 	return 0;
-    return !*ss;
-#endif
+    untokenize(s);
+    return !ss[1];
 }
 
 /**/
@@ -933,8 +913,21 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w)
     }
 
     for (t = s, i = 0;
-	 (c = *t) && ((c != ']' && c != Outbrack &&
+	 (c = *t) && ((c != Outbrack &&
 		       (ishash || c != ',')) || i); t++) {
+	/* Untokenize INULL() except before brackets, for parsestr() */
+	if (INULL(c)) {
+	    if (t[1] == '[' || t[1] == ']') {
+		/* This test handles nested subscripts in hash keys */
+		if (ishash && i)
+		    *t = ztokens[c - Pound];
+		needtok = 1;
+		++t;
+	    } else
+		*t = ztokens[c - Pound];
+	    continue;
+	}
+	/* Inbrack and Outbrack are probably never found here ... */
 	if (c == '[' || c == Inbrack)
 	    i++;
 	else if (c == ']' || c == Outbrack)
@@ -946,11 +939,18 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w)
 	return 0;
     s = dupstrpfx(s, t - s);
     *str = tt = t;
+    /* If we're NOT reverse subscripting, strip the INULL()s so brackets *
+     * are not backslashed after parsestr().  Otherwise leave them alone *
+     * so that the brackets will be escaped when we patcompile() or when *
+     * subscript arithmetic is performed (for nested subscripts).        */
+    if (ishash && !rev)
+	remnulargs(s);
     if (needtok) {
 	if (parsestr(s))
 	    return 0;
 	singsub(&s);
-    }
+    } else if (rev)
+	remnulargs(s);	/* This is probably always a no-op, but ... */
     if (!rev) {
 	if (ishash) {
 	    HashTable ht = v->pm->gets.hfn(v->pm);
@@ -1019,7 +1019,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w)
 		    s = d;
 		}
 	    } else {
-		if (!l || s[l - 1] != '*') {
+		if (!l || s[l - 1] != '*' || (l > 1 && s[l - 2] == '\\')) {
 		    d = (char *) hcalloc(l + 2);
 		    strcpy(d, s);
 		    strcat(d, "*");
@@ -1028,6 +1028,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w)
 	    }
 	}
 	tokenize(s);
+	remnulargs(s);
 
 	if (keymatch || (pprog = patcompile(s, 0, NULL))) {
 	    int len;
@@ -1156,7 +1157,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w)
 				    return r;
 		    }
 		}
-		return 0;
+		return down ? 0 : len + 1;
 	    }
 	}
     }
@@ -1170,13 +1171,27 @@ getindex(char **pptr, Value v)
     int start, end, inv = 0;
     char *s = *pptr, *tbrack;
 
-    *s++ = '[';
-    for (tbrack = s; *tbrack && *tbrack != ']' && *tbrack != Outbrack; tbrack++)
+    *s++ = Inbrack;
+    s = parse_subscript(s);	/* Error handled after untokenizing */
+    /* Now we untokenize everthing except INULL() markers so we can check *
+     * for the '*' and '@' special subscripts.  The INULL()s are removed  *
+     * in getarg() after we know whether we're doing reverse indexing.    */
+    for (tbrack = *pptr + 1; *tbrack && tbrack != s; tbrack++) {
+	if (INULL(*tbrack) && !*++tbrack)
+	    break;
 	if (itok(*tbrack))
 	    *tbrack = ztokens[*tbrack - Pound];
-    if (*tbrack == Outbrack)
-	*tbrack = ']';
-    if ((s[0] == '*' || s[0] == '@') && s[1] == ']') {
+    }
+    /* If we reached the end of the string (s == NULL) we have an error */
+    if (*tbrack)
+	*tbrack = Outbrack;
+    else {
+	zerr("invalid subscript", NULL, 0);
+	*pptr = tbrack;
+	return 1;
+    }
+    s = *pptr + 1;
+    if ((s[0] == '*' || s[0] == '@') && s[1] == Outbrack) {
 	if ((v->isarr || IS_UNSET_VALUE(v)) && s[0] == '@')
 	    v->isarr |= SCANPM_ISVAR_AT;
 	v->start = 0;
@@ -1208,12 +1223,12 @@ getindex(char **pptr, Value v)
 	    }
 	    if (*s == ',') {
 		zerr("invalid subscript", NULL, 0);
-		while (*s != ']' && *s != Outbrack)
+		while (*s && *s != Outbrack)
 		    s++;
 		*pptr = s;
 		return 1;
 	    }
-	    if (*s == ']' || *s == Outbrack)
+	    if (*s == Outbrack)
 		s++;
 	} else {
 	    int com;
@@ -1228,7 +1243,7 @@ getindex(char **pptr, Value v)
 		start--;
 	    else if (start == 0 && end == 0)
 		end++;
-	    if (*s == ']' || *s == Outbrack) {
+	    if (*s == Outbrack) {
 		s++;
 		if (v->isarr && start == end-1 && !com &&
 		    (!(v->isarr & SCANPM_MATCHMANY) ||
diff --git a/Test/D06subscript.ztst b/Test/D06subscript.ztst
new file mode 100644
index 000000000..6438dc040
--- /dev/null
+++ b/Test/D06subscript.ztst
@@ -0,0 +1,130 @@
+# Test parameter subscripting.
+
+%prep
+
+  s='Twinkle, twinkle, little *, [how] I [wonder] what?  You are!'
+  a=('1' ']'  '?' '\2'  '\]' '\?'  '\\3' '\\]'  '\\?' '\\\4'  '\\\]' '\\\?')
+  typeset -g -A A
+  A=($a)
+
+%test
+
+  x=','
+  print $s[(i)winkle] $s[(I)winkle]
+  print ${s[(i)You are]} $#s
+  print ${s[(r)$x,(R)$x]}
+0:Scalar pattern subscripts without wildcards
+>2 11
+>53 60
+>, twinkle, little *,
+
+  x='*'
+  print $s[(i)*] $s[(i)\*] $s[(i)$x*] $s[(i)${(q)x}*] $s[(I)$x\*]
+  print $s[(r)?,(R)\?] $s[(r)\?,(R)?]
+  print $s[(r)\*,(R)*]
+  print $s[(r)\],(R)\[]
+0:Scalar pattern subscripts with wildcards
+>1 26 1 26 26
+>Twinkle, twinkle, little *, [how] I [wonder] what? ?  You are!
+>*, [how] I [wonder] what?  You are!
+>] I [
+
+  # $s[(R)x] actually is $s[0], but zsh treats 0 as 1 for subscripting.
+  print $s[(i)x] : $s[(I)x]
+  print $s[(r)x] : $s[(R)x]
+0:Scalar pattern subscripts that do not match
+>61 : 0
+>: T
+
+  print -R $s[$s[(i)\[]] $s[(i)$s[(r)\*]] $s[(i)${(q)s[(r)\]]}]
+0:Scalar subscripting using a pattern subscript to get the index
+>[ 1 33
+
+  print -R $a[(r)?] $a[(R)?]
+  print $a[(n:2:i)?] $a[(n:2:I)?]
+  print $a[(i)\?] $a[(I)\?]
+  print $a[(i)*] $a[(i)\*]
+0:Array pattern subscripts
+>1 ?
+>2 2
+>3 3
+>1 13
+
+  # It'd be nice to do some of the following with (r), but we run into
+  # limitations of the ztst script parsing of backslashes in the output.
+  print -R $a[(i)\\\\?] $a[(i)\\\\\?]
+  print -R $a[(i)\\\\\\\\?] $a[(i)\\\\\\\\\?]
+  print -R ${a[(i)\\\\\\\\?]} ${a[(i)\\\\\\\\\?]}
+  print -R "$a[(i)\\\\\\\\?] $a[(i)\\\\\\\\\?]"
+  print -R $a[(i)\]] $a[(i)\\\\\]] $a[(i)\\\\\\\\\]] $a[(i)\\\\\\\\\\\\\]]
+  print -R $a[(i)${(q)a[5]}] $a[(i)${(q)a[8]}] $a[(i)${(q)a[11]}]
+  print -R $a[(i)${a[3]}] $a[(i)${a[6]}] $a[(i)${a[9]}] $a[(i)${a[12]}]
+0:Array pattern subscripts with multiple backslashes
+>4 6
+>7 9
+>7 9
+>7 9
+>2 5 8 11
+>5 8 11
+>1 3 4 6
+
+  print -R $A[1] $A[?] $A[\\\\3] $A[\\\]]
+  print -R $A[$a[11]]
+  print -R $A[${(q)a[5]}]
+0:Associative array lookup (direct subscripting)
+>] \2 \\] \?
+>\\\?
+>\\\?
+
+  # The (o) is necessary here for predictable output ordering
+  print -R $A[(I)\?] ${(o)A[(I)?]}
+  print -R $A[(i)\\\\\\\\3]
+  print -R $A[(I)\\\\\\\\\?] ${(o)A[(I)\\\\\\\\?]}
+0:Associative array lookup (pattern subscripting)
+>? 1 ?
+>\\3
+>\\? \\3 \\?
+
+  print -R $A[(R)\?] : ${(o)A[(R)?]}
+  print -R $A[(R)\\\\\?] ${(o)A[(R)\\\\?]} ${(o)A[(R)\\\\\?]}
+  print -R ${(o)A[(R)\\\\\\\\\]]}
+0:Associative array lookup (reverse subscripting)
+>: ]
+>\? \2 \? \?
+>\\]
+
+  x='*'
+  A[$x]=xstar
+  A[${(q)x}]=qxstar
+  print -R ${(k)A[(r)xstar]} $A[$x]
+  print -R ${(k)A[(r)qxstar]} $A[${(q)x}]
+  # A[*] is interpreted specially, assignment to it fails silently (oops)
+  A[*]=star
+  A[\*]=backstar
+  print -R ${(k)A[(r)star]} $A[$x]
+  print -R ${(k)A[(r)backstar]} $A[\*]
+0:Associative array assignment
+>* xstar
+>\* qxstar
+>xstar
+>\* backstar
+
+  o='['
+  c=']'
+  A[\]]=cbrack
+  A[\[]=obrack
+  A[\\\[]=backobrack
+  A[\\\]]=backcbrack
+  print -R $A[$o] $A[$c] $A[\[] $A[\]] $A[\\\[] $A[\\\]]
+  print -R $A[(i)\[] $A[(i)\]] $A[(i)\\\\\[] $A[(i)\\\\\]]
+0:Associative array keys with open and close brackets
+>obrack cbrack obrack cbrack backobrack backcbrack
+>[ ] \[ \]
+
+  print -R $A[$o] $A[$s[(r)\[]]
+  print -R $A[(r)$c] $A[(r)$s[(r)\]]]
+  print -R $A[$A[(i)\\\\\]]]
+0:Associative array lookup using a pattern subscript to get the key
+>obrack obrack
+>] ]
+>backcbrack