From f2f3b86a5f15500dfee707d27eee9784a1626e4d Mon Sep 17 00:00:00 2001
From: Paul Ackersviller <packersv@users.sourceforge.net>
Date: Mon, 25 Jun 2007 03:11:44 +0000
Subject: Merge of 21862/21863/21870: GLOB_SUBST shouldn't swallow up
 backslashes in parameter substitutions that don't match anything.

---
 Src/glob.c             |   40 +-
 Src/lex.c              |  304 +++++++++-----
 Src/pattern.c          |   13 +-
 Src/subst.c            | 1054 +++++++++++++++++++++++++++++++++++++++++++-----
 Src/zsh.h              |   29 +-
 Test/D04parameter.ztst |   14 +
 Test/ztst.zsh          |  184 +++++++--
 7 files changed, 1395 insertions(+), 243 deletions(-)

diff --git a/Src/glob.c b/Src/glob.c
index 93d5e3312..be2dcd5ec 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2487,19 +2487,29 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 mod_export void
 tokenize(char *s)
 {
-    zshtokenize(s, 0);
+    zshtokenize(s, 0, 0);
 }
 
+/*
+ * shtokenize is used when we tokenize a string with GLOB_SUBST set.
+ * In that case we need to retain backslashes when we turn the
+ * pattern back into a string, so that the string is not
+ * modified if it failed to match a pattern.
+ *
+ * It may be modified by the effect of SH_GLOB which turns off
+ * various zsh-specific options.
+ */
+
 /**/
 mod_export void
 shtokenize(char *s)
 {
-    zshtokenize(s, isset(SHGLOB));
+    zshtokenize(s, 1, isset(SHGLOB));
 }
 
 /**/
 static void
-zshtokenize(char *s, int shglob)
+zshtokenize(char *s, int glbsbst, int shglob)
 {
     char *t;
     int bslash = 0;
@@ -2508,9 +2518,10 @@ zshtokenize(char *s, int shglob)
       cont:
 	switch (*s) {
 	case Bnull:
+	case Bnullkeep:
 	case '\\':
 	    if (bslash) {
-		s[-1] = Bnull;
+		s[-1] = glbsbst ? Bnullkeep : Bnull;
 		break;
 	    }
 	    bslash = 1;
@@ -2519,7 +2530,7 @@ zshtokenize(char *s, int shglob)
 	    if (shglob)
 		break;
 	    if (bslash) {
-		s[-1] = Bnull;
+		s[-1] = glbsbst ? Bnullkeep : Bnull;
 		break;
 	    }
 	    t = s;
@@ -2549,7 +2560,7 @@ zshtokenize(char *s, int shglob)
 	    for (t = ztokens; *t; t++)
 		if (*t == *s) {
 		    if (bslash)
-			s[-1] = Bnull;
+			s[-1] = glbsbst ? Bnullkeep : Bnull;
 		    else
 			*s = (t - ztokens) + Pound;
 		    break;
@@ -2569,12 +2580,23 @@ remnulargs(char *s)
 	char *o = s, c;
 
 	while ((c = *s++))
-	    if (INULL(c)) {
+	    if (c == Bnullkeep) {
+		/*
+		 * An active backslash that needs to be turned back into
+		 * a real backslash for output.  However, we don't
+		 * do that yet since we need to ignore it during
+		 * pattern matching.
+		 */
+		continue;
+	    } else if (INULL(c)) {
 		char *t = s - 1;
 
-		while ((c = *s++))
-		    if (!INULL(c))
+		while ((c = *s++)) {
+		    if (c == Bnullkeep)
+			*t++ = '\\';
+		    else if (!INULL(c))
 			*t++ = c;
+		}
 		*t = '\0';
 		if (!*o) {
 		    o[0] = Nularg;
diff --git a/Src/lex.c b/Src/lex.c
index 147bea598..52b6885af 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -33,7 +33,7 @@
 /* tokens */
 
 /**/
-mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\";
+mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\\\";
 
 /* parts of the current token */
 
@@ -44,7 +44,7 @@ mod_export char *tokstr;
 /**/
 mod_export int tok;
 /**/
-int tokfd;
+mod_export int tokfd;
 
 /* lexical analyzer error flag */
  
@@ -93,6 +93,11 @@ mod_export int inwhat;
 /**/
 mod_export int addedx;
 
+/* wb and we hold the beginning/end position of the word we are completing. */
+
+/**/
+mod_export int wb, we;
+
 /* 1 if aliases should not be expanded */
  
 /**/
@@ -111,8 +116,13 @@ mod_export int parbegin;
 
 /**/
 mod_export int parend;
+
+/* don't recognize comments */
  
-/* text of puctuation tokens */
+/**/
+mod_export int nocomments;
+
+/* text of punctuation tokens */
 
 /**/
 mod_export char *tokstrings[WHILE + 1] = {
@@ -167,7 +177,6 @@ struct lexstack {
     int isfirstch;
     int histactive;
     int histdone;
-    int spaceflag;
     int stophist;
     int hlinesz;
     char *hline;
@@ -178,6 +187,7 @@ struct lexstack {
     char *yytext;
     char *bptr;
     int bsiz;
+    int len;
     short *chwords;
     int chwordlen;
     int chwordpos;
@@ -222,7 +232,6 @@ lexsave(void)
     ls->isfirstch = isfirstch;
     ls->histactive = histactive;
     ls->histdone = histdone;
-    ls->spaceflag = spaceflag;
     ls->stophist = stophist;
     ls->hline = chline;
     ls->hptr = hptr;
@@ -236,6 +245,7 @@ lexsave(void)
     ls->yytext = yytext;
     ls->bptr = bptr;
     ls->bsiz = bsiz;
+    ls->len = len;
     ls->chwords = chwords;
     ls->chwordlen = chwordlen;
     ls->chwordpos = chwordpos;
@@ -260,6 +270,7 @@ lexsave(void)
     inredir = 0;
     hdocs = NULL;
     histactive = 0;
+    ecbuf = NULL;
 
     ls->next = lstack;
     lstack = ls;
@@ -282,7 +293,6 @@ lexrestore(void)
     isfirstch = lstack->isfirstch;
     histactive = lstack->histactive;
     histdone = lstack->histdone;
-    spaceflag = lstack->spaceflag;
     stophist = lstack->stophist;
     chline = lstack->hline;
     hptr = lstack->hptr;
@@ -296,6 +306,7 @@ lexrestore(void)
     yytext = lstack->yytext;
     bptr = lstack->bptr;
     bsiz = lstack->bsiz;
+    len = lstack->len;
     chwords = lstack->chwords;
     chwordlen = lstack->chwordlen;
     chwordpos = lstack->chwordpos;
@@ -308,6 +319,8 @@ lexrestore(void)
     hwbegin = lstack->hwbegin;
     hwend = lstack->hwend;
     addtoline = lstack->addtoline;
+    if (ecbuf)
+	zfree(ecbuf, eclen);
     eclen = lstack->eclen;
     ecused = lstack->ecused;
     ecnpats = lstack->ecnpats;
@@ -339,13 +352,13 @@ yylex(void)
 	    char *name;
 
 	    hwbegin(0);
-	    cmdpush(hdocs->type == HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
+	    cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
 	    STOPHIST
 	    name = gethere(hdocs->str, hdocs->type);
 	    ALLOWHIST
 	    cmdpop();
 	    hwend();
-	    setheredoc(hdocs->pc, HERESTR, name);
+	    setheredoc(hdocs->pc, REDIR_HERESTR, name);
 	    zfree(hdocs, sizeof(struct heredocs));
 	    hdocs = next;
 	}
@@ -380,7 +393,7 @@ ctxtlex(void)
     case BAR:
     case BARAMP:
     case INOUTPAR:
-    case DO:
+    case DOLOOP:
     case THEN:
     case ELIF:
     case ELSE:
@@ -569,6 +582,43 @@ cmd_or_math_sub(void)
     return skipcomm();
 }
 
+/* Check whether we're looking at valid numeric globbing syntax      *
+ * (/\<[0-9]*-[0-9]*\>/).  Call pointing just after the opening "<". *
+ * Leaves the input in the same place, returning 0 or 1.             */
+
+/**/
+static int
+isnumglob(void)
+{
+    int c, ec = '-', ret = 0;
+    int tbs = 256, n = 0;
+    char *tbuf = (char *)zalloc(tbs);
+
+    while(1) {
+	c = hgetc();
+	if(lexstop) {
+	    lexstop = 0;
+	    break;
+	}
+	tbuf[n++] = c;
+	if(!idigit(c)) {
+	    if(c != ec)
+		break;
+	    if(ec == '>') {
+		ret = 1;
+		break;
+	    }
+	    ec = '>';
+	}
+	if(n == tbs)
+	    tbuf = (char *)realloc(tbuf, tbs *= 2);
+    }
+    while(n--)
+	hungetc(tbuf[n]);
+    zfree(tbuf, tbs);
+    return ret;
+}
+
 /**/
 int
 gettok(void)
@@ -605,7 +655,18 @@ gettok(void)
 	return DOUTPAR;
     } else if (idigit(c)) {	/* handle 1< foo */
 	d = hgetc();
-	if (d == '>' || d == '<') {
+	if(d == '&') {
+	    d = hgetc();
+	    if(d == '>') {
+		peekfd = c - '0';
+		hungetc('>');
+		c = '&';
+	    } else {
+		hungetc(d);
+		lexstop = 0;
+		hungetc('&');
+	    }
+	} else if (d == '>' || d == '<') {
 	    peekfd = c - '0';
 	    c = d;
 	} else {
@@ -616,7 +677,7 @@ gettok(void)
 
     /* chars in initial position in word */
 
-    if (c == hashchar &&
+    if (c == hashchar && !nocomments &&
 	(isset(INTERACTIVECOMMENTS) ||
 	 (!zleparse && !expanding &&
 	  (!interact || unset(SHINSTDIN) || strin)))) {
@@ -665,6 +726,7 @@ gettok(void)
 	else if (d == '!' || d == '|')
 	    return AMPERBANG;
 	else if (d == '>') {
+	    tokfd = peekfd;
 	    d = hgetc();
 	    if (d == '!' || d == '|')
 		return OUTANGAMPBANG;
@@ -678,7 +740,6 @@ gettok(void)
 	    }
 	    hungetc(d);
 	    lexstop = 0;
-	    tokfd = -1;
 	    return AMPOUTANG;
 	}
 	hungetc(d);
@@ -719,41 +780,15 @@ gettok(void)
 	if (!incmdpos && d == '(') {
 	    hungetc(d);
 	    lexstop = 0;
+	    unpeekfd:
+	    if(peekfd != -1) {
+		hungetc(c);
+		c = '0' + peekfd;
+	    }
 	    break;
 	}
-	if (d == '>')
+	if (d == '>') {
 	    peek = INOUTANG;
-	else if (idigit(d) || d == '-') {
-	    int tbs = 256, n = 0, nc;
-	    char *tbuf, *tbp, *ntb;
-
-	    tbuf = tbp = (char *)zalloc(tbs);
-	    hungetc(d);
-
-	    while ((nc = hgetc()) && !lexstop) {
-		if (!idigit(nc) && nc != '-')
-		    break;
-		*tbp++ = (char)nc;
-		if (++n == tbs) {
-		    ntb = (char *)realloc(tbuf, tbs *= 2);
-		    tbp += ntb - tbuf;
-		    tbuf = ntb;
-		}
-	    }
-	    if (nc == '>' && !lexstop) {
-		hungetc(nc);
-		while (n--)
-		    hungetc(*--tbp);
-		zfree(tbuf, tbs);
-		break;
-	    }
-	    if (nc && !lexstop)
-		hungetc(nc);
-	    lexstop = 0;
-	    while (n--)
-		hungetc(*--tbp);
-	    zfree(tbuf, tbs);
-	    peek = INANG;
 	} else if (d == '<') {
 	    int e = hgetc();
 
@@ -770,12 +805,13 @@ gettok(void)
 		lexstop = 0;
 		peek = DINANG;
 	    }
-	} else if (d == '&')
+	} else if (d == '&') {
 	    peek = INANGAMP;
-	else {
-	    peek = INANG;
+	} else {
 	    hungetc(d);
-	    lexstop = 0;
+	    if(isnumglob())
+		goto unpeekfd;
+	    peek = INANG;
 	}
 	tokfd = peekfd;
 	return peek;
@@ -783,7 +819,7 @@ gettok(void)
 	d = hgetc();
 	if (d == '(') {
 	    hungetc(d);
-	    break;
+	    goto unpeekfd;
 	} else if (d == '&') {
 	    d = hgetc();
 	    if (d == '!' || d == '|')
@@ -957,8 +993,12 @@ gettokstr(int c, int sub)
 	    c = Outbrack;
 	    break;
 	case LX2_INPAR:
-	    if ((sub || in_brace_param) && isset(SHGLOB))
-		break;
+	    if (isset(SHGLOB)) {
+		if (sub || in_brace_param)
+		    break;
+		if (incasepat && !len)
+		    return INPAR;
+	    }
 	    if (!in_brace_param) {
 		if (!sub) {
 		    e = hgetc();
@@ -1056,29 +1096,27 @@ gettokstr(int c, int sub)
 	    if (isset(SHGLOB) && sub)
 		break;
 	    e = hgetc();
-	    if (!(idigit(e) || e == '-' || (e == '(' && intpos))) {
-		hungetc(e);
-		lexstop = 0;
-		if (in_brace_param || sub)
-		    break;
-		goto brk;
-	    }
-	    c = Inang;
-	    if (e == '(') {
-		add(c);
+	    if(e == '(' && intpos) {
+		add(Inang);
 		if (skipcomm()) {
 		    peek = LEXERR;
 		    goto brk;
 		}
 		c = Outpar;
-	    } else {
-		add(c);
-		c = e;
-		while (c != '>' && !lexstop)
-		    add(c), c = hgetc();
+		break;
+	    }
+	    hungetc(e);
+	    if(isnumglob()) {
+		add(Inang);
+		while ((c = hgetc()) != '>')
+		    add(c);
 		c = Outang;
+		break;
 	    }
-	    break;
+	    lexstop = 0;
+	    if (in_brace_param || sub)
+		break;
+	    goto brk;
 	case LX2_EQUALS:
 	    if (intpos) {
 		e = hgetc();
@@ -1106,6 +1144,8 @@ gettokstr(int c, int sub)
 			skipparens(Inbrack, Outbrack, &t);
 		    }
 		}
+		if (*t == '+')
+                    t++;
 		if (t == bptr) {
 		    e = hgetc();
 		    if (e == '(' && incmdpos) {
@@ -1161,7 +1201,7 @@ gettokstr(int c, int sub)
 		    goto brk;
 		}
 		e = hgetc();
-		if (e != '\'' || unset(RCQUOTES))
+		if (e != '\'' || unset(RCQUOTES) || strquote)
 		    break;
 		add(c);
 	    }
@@ -1268,10 +1308,14 @@ dquote_parse(char endchar, int sub)
 	    c = hgetc();
 	    if (c != '\n') {
 		if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
-		    c == endchar || c == '`')
+		    c == endchar || c == '`' ||
+		    (endchar == ']' && (c == '[' || c == ']' ||
+					c == '(' || c == ')' ||
+					c == '{' || c == '}' ||
+					(c == '"' && sub))))
 		    add(Bnull);
 		else {
-		    /* lexstop is implicitely handled here */
+		    /* lexstop is implicitly handled here */
 		    add('\\');
 		    goto cont;
 		}
@@ -1353,11 +1397,13 @@ dquote_parse(char endchar, int sub)
 		err = (!brct-- && math);
 	    break;
 	case '"':
-	    if (intick || (!endchar && !bct))
+	    if (intick || ((endchar == ']' || !endchar) && !bct))
 		break;
 	    if (bct) {
 		add(Dnull);
+		cmdpush(CS_DQUOTE);
 		err = dquote_parse('"', sub);
+		cmdpop();
 		c = Dnull;
 	    } else
 		err = 1;
@@ -1389,6 +1435,22 @@ dquote_parse(char endchar, int sub)
 /**/
 mod_export int
 parsestr(char *s)
+{
+    int err;
+
+    if ((err = parsestrnoerr(s))) {
+	untokenize(s);
+	if (err > 32 && err < 127)
+	    zerr("parse error near `%c'", NULL, err);
+	else
+	    zerr("parse error", NULL, 0);
+    }
+    return err;
+}
+
+/**/
+mod_export int
+parsestrnoerr(char *s)
 {
     int l = strlen(s), err;
 
@@ -1405,14 +1467,39 @@ parsestr(char *s)
     inpop();
     DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
     lexrestore();
+    return err;
+}
+
+/**/
+mod_export char *
+parse_subscript(char *s, int sub)
+{
+    int l = strlen(s), err;
+    char *t;
+
+    if (!*s || *s == ']')
+	return 0;
+    lexsave();
+    untokenize(t = dupstring(s));
+    inpush(t, 0, NULL);
+    strinbeg(0);
+    len = 0;
+    bptr = tokstr = s;
+    bsiz = l + 1;
+    err = dquote_parse(']', sub);
     if (err) {
+	err = *bptr;
+	*bptr = 0;
 	untokenize(s);
-	if (err > 32 && err < 127)
-	    zerr("parse error near `%c'", NULL, err);
-	else
-	    zerr("parse error", NULL, 0);
-    }
-    return err;
+	*bptr = err;
+	s = 0;
+    } else
+	s = bptr;
+    strinend();
+    inpop();
+    DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
+    lexrestore();
+    return s;
 }
 
 /* Tokenize a string given in s. Parsing is done as if s were a normal *
@@ -1420,12 +1507,12 @@ parsestr(char *s)
  * to parse the right-hand side of ${...%...} substitutions.           */
 
 /**/
-int
+mod_export int
 parse_subst_string(char *s)
 {
-    int c, l = strlen(s), err;
+    int c, l = strlen(s), err, olen, lexstop_ret;
 
-    if (! *s)
+    if (!*s || !strcmp(s, nulstring))
 	return 0;
     lexsave();
     untokenize(s);
@@ -1435,11 +1522,13 @@ parse_subst_string(char *s)
     bptr = tokstr = s;
     bsiz = l + 1;
     c = hgetc();
+    lexstop_ret = lexstop;
     c = gettokstr(c, 1);
     err = errflag;
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
+    olen = len;
     lexrestore();
     errflag = err;
     if (c == LEXERR) {
@@ -1447,9 +1536,9 @@ parse_subst_string(char *s)
 	return 1;
     }
 #ifdef DEBUG
-    if (c != STRING || len != l || errflag) {
+    if (c != STRING || olen != l || errflag) {
 	fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
-		len < l ? "len < l" : errflag ? "errflag" : "c != STRING");
+		olen < l ? "len < l" : errflag ? "errflag" : "c != STRING");
 	fflush(stderr);
 	untokenize(s);
 	return 1;
@@ -1458,6 +1547,19 @@ parse_subst_string(char *s)
     return 0;
 }
 
+/* Called below to report word positions. */
+
+/**/
+mod_export void
+gotword(void)
+{
+    we = ll + 1 - inbufct + (addedx == 2 ? 1 : 0);
+    if (cs <= we) {
+	wb = ll - wordbeg + addedx;
+	zleparse = 0;
+    }
+}
+
 /* expand aliases and reserved words */
 
 /**/
@@ -1502,18 +1604,32 @@ exalias(void)
 
 	if (tok == STRING) {
 	    /* Check for an alias */
-	    an = noaliases ? NULL :
-		(Alias) aliastab->getnode(aliastab, yytext);
-	    if (an && !an->inuse && ((an->flags & ALIAS_GLOBAL) || incmdpos ||
-				     inalmore)) {
-		inpush(an->text, INP_ALIAS, an);
-		/* remove from history if it begins with space */
-		if (isset(HISTIGNORESPACE) && an->text[0] == ' ')
-		    remhist();
-		lexstop = 0;
-		if (yytext == copy)
-		    yytext = tokstr;
-		return 1;
+	    if (!noaliases && isset(ALIASESOPT)) {
+		char *suf;
+		
+		an = (Alias) aliastab->getnode(aliastab, yytext);
+		if (an && !an->inuse &&
+		    ((an->flags & ALIAS_GLOBAL) || incmdpos || inalmore)) {
+		    inpush(an->text, INP_ALIAS, an);
+		    if (an->text[0] == ' ')
+			aliasspaceflag = 1;
+		    lexstop = 0;
+		    if (yytext == copy)
+			yytext = tokstr;
+		    return 1;
+		}
+		if ((suf = strrchr(yytext, '.')) && suf[1] &&
+		    suf > yytext && suf[-1] != Meta &&
+		    (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
+		    !an->inuse && incmdpos) {
+		    inpush(dupstring(yytext), INP_ALIAS, NULL);
+		    inpush(" ", INP_ALIAS, NULL);
+		    inpush(an->text, INP_ALIAS, an);
+		    lexstop = 0;
+		    if (yytext == copy)
+			yytext = tokstr;
+		    return 1;
+		}
 	    }
 
 	    /* Then check for a reserved word */
diff --git a/Src/pattern.c b/Src/pattern.c
index 393d9bf41..d8bd9ef98 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -260,13 +260,13 @@ static char endseg[] = {
 
 static char endstr[] = {
     '/',			/* file only */
-    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang,
+    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
 				/* all patterns */
     Tilde, Hat, Pound		/* extended glob only */
 };
 
-#define PATENDSTRLEN_NORM 9
-#define PATENDSTRLEN_EXT  12
+#define PATENDSTRLEN_NORM 10
+#define PATENDSTRLEN_EXT  13
 
 
 /* Default size for pattern buffer */
@@ -1240,6 +1240,13 @@ patcomppiece(int *flagp)
 	     */
 	    return 0;
 	    break;
+	case Bnullkeep:
+	    /*
+	     * Marker for restoring a backslash in output:
+	     * does not match a character.
+	     */
+	    return patcomppiece(flagp);
+	    break;
 #ifdef DEBUG
 	default:
 	    dputs("BUG: character not handled in patcomppiece");
diff --git a/Src/subst.c b/Src/subst.c
index 408a9d406..67de61418 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -49,8 +49,10 @@ char nulstring[] = {Nularg, '\0'};
 mod_export void
 prefork(LinkList list, int flags)
 {
-    LinkNode node;
+    LinkNode node, stop = 0;
+    int keep = 0, asssub = (flags & PF_TYPESET) && isset(KSHTYPESET);
 
+    queue_signals();
     for (node = firstnode(list); node; incnode(node)) {
 	char *str, c;
 
@@ -61,35 +63,49 @@ prefork(LinkList list, int flags)
 		setdata(node, (void *) getproc(str));	/* <(...) or >(...) */
 	    else
 		setdata(node, (void *) getoutputfile(str));	/* =(...) */
-	    if (!getdata(node))
+	    if (!getdata(node)) {
+		unqueue_signals();
 		return;
+	    }
 	} else {
 	    if (isset(SHFILEEXPANSION))
 		filesub((char **)getaddrdata(node),
 			flags & (PF_TYPESET|PF_ASSIGN));
-	    if (!(node = stringsubst(list, node, flags & PF_SINGLE)))
+	    if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
+		unqueue_signals();
 		return;
+	    }
 	}
     }
     for (node = firstnode(list); node; incnode(node)) {
+	if (node == stop)
+	    keep = 0;
 	if (*(char *)getdata(node)) {
 	    remnulargs(getdata(node));
-	    if (unset(IGNOREBRACES) && !(flags & PF_SINGLE))
-		while (hasbraces(getdata(node)))
+	    if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) {
+		if (!keep)
+		    stop = nextnode(node);
+		while (hasbraces(getdata(node))) {
+		    keep = 1;
 		    xpandbraces(list, &node);
+		}
+	    }
 	    if (unset(SHFILEEXPANSION))
 		filesub((char **)getaddrdata(node),
 			flags & (PF_TYPESET|PF_ASSIGN));
-	} else if (!(flags & PF_SINGLE))
+	} else if (!(flags & PF_SINGLE) && !keep)
 	    uremnode(list, node);
-	if (errflag)
+	if (errflag) {
+	    unqueue_signals();
 	    return;
+	}
     }
+    unqueue_signals();
 }
 
 /**/
 static LinkNode
-stringsubst(LinkList list, LinkNode node, int ssub)
+stringsubst(LinkList list, LinkNode node, int ssub, int asssub)
 {
     int qt;
     char *str3 = (char *)getdata(node);
@@ -124,7 +140,7 @@ stringsubst(LinkList list, LinkNode node, int ssub)
 		str3 = (char *)getdata(node);
 		continue;
 	    }
-	} else if ((qt = c == Qtick) || c == Tick)
+	} else if ((qt = c == Qtick) || (c == Tick ? (mult_isarr = 1) : 0))
 	  comsub: {
 	    LinkList pl;
 	    char *s, *str2 = str;
@@ -181,7 +197,7 @@ stringsubst(LinkList list, LinkNode node, int ssub)
 		continue;
 	    }
 	    if (!qt && ssub && isset(GLOBSUBST))
-		tokenize(s);
+		shtokenize(s);
 	    l1 = str2 - str3;
 	    l2 = strlen(s);
 	    if (nonempty(pl)) {
@@ -203,12 +219,48 @@ stringsubst(LinkList list, LinkNode node, int ssub)
 	    str3 = str2;
 	    setdata(node, str3);
 	    continue;
+	} else if (asssub && ((c == '=') || c == Equals) && str != str3) {
+	    /*
+	     * We are in a normal argument which looks like an assignment
+	     * and is to be treated like one, with no word splitting.
+	     */
+	    ssub = 1;
 	}
 	str++;
     }
     return errflag ? NULL : node;
 }
 
+/*
+ * Simplified version of the prefork/singsub processing where
+ * we only do substitutions appropriate to quoting.  Currently
+ * this means only the expansions in $'....'.  This is used
+ * for the end tag for here documents.  As we are not doing
+ * `...` expansions, we just use those for quoting.  However,
+ * they stay in the text.  This is weird, but that's not
+ * my fault.
+ *
+ * The remnulargs() makes this consistent with the other forms
+ * of substitution, indicating that quotes have been fully
+ * processed.
+ */
+
+/**/
+void
+quotesubst(char *str)
+{
+    char *s = str;
+
+    while (*s) {
+	if (*s == String && s[1] == Snull) {
+	    s = getkeystring(s, NULL, 4, NULL);
+	} else {
+	    s++;
+	}
+    }
+    remnulargs(str);
+}
+
 /**/
 mod_export void
 globlist(LinkList list, int nountok)
@@ -218,7 +270,7 @@ globlist(LinkList list, int nountok)
     badcshglob = 0;
     for (node = firstnode(list); !errflag && node; node = next) {
 	next = nextnode(node);
-	glob(list, node, nountok);
+	zglob(list, node, nountok);
     }
     if (badcshglob == 1)
 	zerr("no match", NULL, 0);
@@ -230,11 +282,13 @@ globlist(LinkList list, int nountok)
 mod_export void
 singsub(char **s)
 {
+    int omi = mult_isarr;
     local_list1(foo);
 
     init_list1(foo, *s);
 
     prefork(&foo, PF_SINGLE);
+    mult_isarr = omi;
     if (errflag)
 	return;
     *s = (char *) ugetnode(&foo);
@@ -256,7 +310,7 @@ static int mult_isarr;
 
 /**/
 static int
-multsub(char **s, char ***a, int *isarr, char *sep)
+multsub(char **s, char ***a, int *isarr, UNUSED(char *sep))
 {
     int l, omi = mult_isarr;
     char **r, **p;
@@ -276,6 +330,15 @@ multsub(char **s, char ***a, int *isarr, char *sep)
 	while (nonempty(&foo))
 	    *p++ = (char *)ugetnode(&foo);
 	*p = NULL;
+	/*
+	 * This is the most obscure way of deciding whether a value is
+	 * an array it would be possible to imagine.  It seems to result
+	 * partly because we don't pass down the qt and ssub flags from
+	 * paramsubst() through prefork() properly, partly because we
+	 * don't tidy up to get back the return type from multsub we
+	 * need properly.  The crux of neatening this up is to get rid
+	 * of the following test.
+	 */
 	if (a && mult_isarr) {
 	    *a = r;
 	    *isarr = SCANPM_MATCHMANY;
@@ -307,7 +370,7 @@ multsub(char **s, char ***a, int *isarr, char *sep)
 mod_export void
 filesub(char **namptr, int assign)
 {
-    char *sub = NULL, *str, *ptr;
+    char *eql = NULL, *sub = NULL, *str, *ptr;
     int len;
 
     filesubstr(namptr, assign);
@@ -316,7 +379,7 @@ filesub(char **namptr, int assign)
 	return;
 
     if (assign & PF_TYPESET) {
-	if ((*namptr)[1] && (sub = strchr(*namptr + 1, Equals))) {
+	if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) {
 	    str = sub + 1;
 	    if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) {
 		sub[1] = '\0';
@@ -330,7 +393,9 @@ filesub(char **namptr, int assign)
     while ((sub = strchr(ptr, ':'))) {
 	str = sub + 1;
 	len = sub - *namptr;
-	if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) {
+	if (sub > eql &&
+	    (sub[1] == Tilde || sub[1] == Equals) &&
+	    filesubstr(&str, assign)) {
 	    sub[1] = '\0';
 	    *namptr = dyncat(*namptr, str);
 	}
@@ -397,15 +462,9 @@ filesubstr(char **namptr, int assign)
 	sav = *pp;
 	*pp = 0;
 	if (!(cnam = findcmd(str + 1, 1))) {
-	    Alias a = (Alias) aliastab->getnode(aliastab, str + 1);
-	    
-	    if (a)
-		cnam = a->text;
-	    else {
-		if (isset(NOMATCH))
-		    zerr("%s not found", str + 1, 0);
-		return 0;
-	    }
+	    if (isset(NOMATCH))
+		zerr("%s not found", str + 1, 0);
+	    return 0;
 	}
 	*namptr = dupstring(cnam);
 	if (sav) {
@@ -430,14 +489,14 @@ strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub,
     if (!pl && (!s || !*s)) {
 	*d = dest = (copied ? src : dupstring(src));
 	if (glbsub)
-	    tokenize(dest);
+	    shtokenize(dest);
     } else {
 	*d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1);
 	strncpy(dest, pb, pl);
 	dest += pl;
 	strcpy(dest, src);
 	if (glbsub)
-	    tokenize(dest);
+	    shtokenize(dest);
 	dest += l;
 	if (s)
 	    strcpy(dest, s);
@@ -519,6 +578,72 @@ invcstrpcmp(const void *a, const void *b)
 #endif
 }
 
+/**/
+int
+nstrpcmp(const void *a, const void *b)
+{
+    char *c = *(char **)a, *d = *(char **)b;
+    int cmp;
+
+#ifdef HAVE_STRCOLL
+    cmp = strcoll(c, d);
+#endif
+    for (; *c == *d && *c; c++, d++);
+#ifndef HAVE_STRCOLL
+    cmp = (int)STOUC(*c) - (int)STOUC(*d);
+#endif
+    if (idigit(*c) || idigit(*d)) {
+	for (; c > *(char **)a && idigit(c[-1]); c--, d--);
+	if (idigit(*c) && idigit(*d)) {
+	    while (*c == '0')
+		c++;
+	    while (*d == '0')
+		d++;
+	    for (; idigit(*c) && *c == *d; c++, d++);
+	    if (idigit(*c) || idigit(*d)) {
+		cmp = (int)STOUC(*c) - (int)STOUC(*d);
+		while (idigit(*c) && idigit(*d))
+		    c++, d++;
+		if (idigit(*c) && !idigit(*d))
+		    return 1;
+		if (idigit(*d) && !idigit(*c))
+		    return -1;
+	    }
+	}
+    }
+    return cmp;
+}
+
+/**/
+int
+invnstrpcmp(const void *a, const void *b)
+{
+    return -nstrpcmp(a, b);
+}
+
+/**/
+int
+instrpcmp(const void *a, const void *b)
+{
+    VARARR(char, c, strlen(*(char **) a) + 1);
+    VARARR(char, d, strlen(*(char **) b) + 1);
+    char **e = (char **)&c;
+    char **f = (char **)&d;
+    char *s, *t;
+
+    for (s = *(char **) a, t = c; (*t++ = tulower(*s++)););
+    for (s = *(char **) b, t = d; (*t++ = tulower(*s++)););
+
+    return nstrpcmp(&e, &f);
+}
+
+/**/
+int
+invinstrpcmp(const void *a, const void *b)
+{
+    return -instrpcmp(a, b);
+}
+
 /**/
 static char *
 dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul)
@@ -702,17 +827,24 @@ get_intarg(char **s)
 /* Parsing for the (e) flag. */
 
 static int
-subst_parse_str(char **sp, int single)
+subst_parse_str(char **sp, int single, int err)
 {
     char *s;
 
     *sp = s = dupstring(*sp);
 
-    if (!parsestr(s)) {
+    if (!(err ? parsestr(s) : parsestrnoerr(s))) {
 	if (!single) {
+            int qt = 0;
+
 	    for (; *s; s++)
-		if (*s == Qstring)
-		    *s = String;
+		if (!qt) {
+		    if (*s == Qstring)
+			*s = String;
+		    else if (*s == Qtick)
+			*s = Tick;
+                } else if (*s == Dnull)
+                    qt = !qt;
 	}
 	return 0;
     }
@@ -724,6 +856,23 @@ subst_parse_str(char **sp, int single)
 #define	isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
 #define isbrack(c)  ((c) == '[' || (char)(c) == Inbrack)
 
+/*
+ * Given a linked list l with node n, perform parameter substitution
+ * starting from *str.  Return the node with the substitutuion performed
+ * or NULL if it failed.
+ *
+ * If qt is true, the `$' was quoted.  TODO: why can't we just look
+ * to see if the first character was String or Qstring?
+ *
+ * If ssub is true, we are being called via singsubst(), which means
+ * the result will be a single word.  TODO: can we generate the
+ * single word at the end?  TODO: if not, or maybe in any case,
+ * can we pass down the ssub flag from prefork with the other flags
+ * instead of pushing it into different arguments?  (How exactly
+ * to qt and ssub differ?  Are both necessary, if so is there some
+ * better way of separating the two?)
+ */
+
 /**/
 LinkNode
 paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
@@ -731,41 +880,207 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     char *aptr = *str, c, cc;
     char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
     int colf;			/* != 0 means we found a colon after the name */
+    /*
+     * There are far too many flags.  They need to be grouped
+     * together into some structure which ties them to where they
+     * came from.
+     *
+     * Some flags have a an obscure relationship to their effect which
+     * depends on incrementing them to particular values in particular
+     * ways.
+     */
+    /*
+     * Whether the value is an array (in aval) or not (in val).  There's
+     * a movement from storing the value in the stuff read from the
+     * parameter (the value v) to storing them in val and aval.
+     * However, sometimes you find v reappearing temporarily.
+     *
+     * The values -1 and 2 are special to isarr.  It looks like 2 is
+     * some kind of an internal flag to do with whether the array's been
+     * copied, in which case I don't know why we don't use the copied
+     * flag, but they do both occur close together so they presumably
+     * have different effects.  The value -1 is isued to force us to
+     * keep an empty array.  It's tested in the YUK chunk (I mean the
+     * one explicitly marked as such).
+     */
     int isarr = 0;
+    /*
+     * This is just the setting of the option except we need to
+     * take account of ^ and ^^.
+     */
     int plan9 = isset(RCEXPANDPARAM);
+    /*
+     * Likwise, but with ~ and ~~.  Also, we turn it off later
+     * on if qt is passed down.
+     */
     int globsubst = isset(GLOBSUBST);
+    /*
+     * Indicates ${#pm}, massaged by whichlen which is set by
+     * the (c), (w), and (W) flags to indicate how we take the length.
+     */
     int getlen = 0;
     int whichlen = 0;
+    /*
+     * Indicates ${+pm}: a simple boolean for once.
+     */
     int chkset = 0;
+    /*
+     * Indicates we have tried to get a value in v but that was
+     * unset.  I don't quite understand why (v == NULL) isn't
+     * good enough, but there are places where we seem to need
+     * to second guess whether a value is a real value or not.
+     */
     int vunset = 0;
+    /*
+     * Indicates (t) flag, i.e. print out types.  The code for
+     * this actually isn't too horrifically inbred compared with
+     * that for (P).
+     */
     int wantt = 0;
+    /*
+     * Indicates spliting a string into an array.  There aren't
+     * actually that many special cases for this --- which may
+     * be why it doesn't work properly; we split in some cases
+     * where we shouldn't, in particular on the multsubs for
+     * handling embedded values for ${...=...} and the like.
+     */
     int spbreak = isset(SHWORDSPLIT) && !ssub && !qt;
+    /* Scalar and array value, see isarr above */
     char *val = NULL, **aval = NULL;
+    /*
+     * Padding based on setting in parameter rather than substitution
+     * flags.  This is only used locally.
+     */
     unsigned int fwidth = 0;
+    /*
+     * vbuf and v are both used to retrieve parameter values; this
+     * is a kludge, we pass down vbuf and it may or may not return v.
+     */
     struct value vbuf;
     Value v = NULL;
+    /*
+     * This expressive name refers to the set of flags which
+     * is applied to matching for #, %, / and their doubled variants:
+     * (M), (R), (B), (E), (N), (S).
+     */
     int flags = 0;
+    /* Value from (I) flag, used for ditto. */
     int flnum = 0;
-    int sortit = 0, casind = 0;
+    /*
+     * sortit is an obscure combination of the settings for (o), (O),
+     * (i) and (n). casind is (i) and numord is (n); these are
+     * separate so we can have fun doing the obscure combinatorics later.
+     * indord is the (a) flag, which for consistency doesn't get
+     * combined into sortit.
+     */
+    int sortit = 0, casind = 0, numord = 0, indord = 0;
+    /* (u): straightforward. */
+    int unique = 0;
+    /* combination of (L), (U) and (C) flags. */
     int casmod = 0;
+    /*
+     * quotemod says we are doing either (q) (positive), (Q) (negative)
+     * or not (0).  quotetype counts the q's for the first case.
+     * quoterr is simply (X) but gets passed around a lot because the
+     * combination (eX) needs it.
+     */
     int quotemod = 0, quotetype = 0, quoteerr = 0;
+    /*
+     * (V) flag: fairly straightforward, except that as with so
+     * many flags it's not easy to decide where to put it in the order.
+     */
     int visiblemod = 0;
+    /*
+     * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied
+     * spbreak, see above; fairly straighforward in use but c.f.
+     * the comment for visiblemod.
+     */
+    int shsplit = 0;
+    /*
+     * The separator from (j) and (s) respectively, or (F) and (f)
+     * respectively (hardwired to "\n" in that case).  Slightly
+     * confusingly also used for ${#pm}, thought that's at least
+     * documented in the manual
+     */
     char *sep = NULL, *spsep = NULL;
+    /*
+     * Padding strings.  The left and right padding strings which
+     * are repeated, then the ones which only occur once, for
+     * the (l) and (r) flags.
+     */
     char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL;
-    char *replstr = NULL;	/* replacement string for /orig/repl */
+    /* Replacement string for /orig/repl and //orig/repl */
+    char *replstr = NULL;
+    /* The numbers for (l) and (r) */
     zlong prenum = 0, postnum = 0;
+    /*
+     * Whether the value has been copied.  Optimisation:  if we
+     * are modifying an expression, we only need to copy it the
+     * first time, and if we don't modify it we can just use the
+     * value from the parameter or input.
+     */
     int copied = 0;
+    /*
+     * The (A) flag for array assignment, with consequences for
+     * splitting and joining; (AA) gives arrasg == 2 for associative
+     * arrays.
+     */
     int arrasg = 0;
+    /*
+     * The (e) flag.  As we need to do extra work not quite
+     * at the end, the effect of this is kludged in in several places.
+     */
     int eval = 0;
+    /*
+     * The (P) flag.  This interacts a bit obscurely with whether
+     * or not we are dealing with a sub expression (subexp).
+     */
     int aspar = 0;
+    /*
+     * The (%) flag, c.f. visiblemod again.
+     */	
     int presc = 0;
+    /*
+     * The (@) flag; interacts obscurely with qt and isarr.
+     * This is one of the things that decides whether multsub
+     * will produce an array, but in an extremely indirect fashion.
+     */
     int nojoin = 0;
-    char inbrace = 0;		/* != 0 means ${...}, otherwise $... */
+    /*
+     * != 0 means ${...}, otherwise $...  What works without braces
+     * is largely a historical artefact (everything works with braces,
+     * I sincerely hope).
+     */
+    char inbrace = 0;
+    /*
+     * Use for the (k) flag.  Goes down into the parameter code,
+     * sometimes.
+     */
     char hkeys = 0;
+    /*
+     * Used for the (v) flag, ditto.  Not quite sure why they're
+     * separate, but the tradition seems to be that things only
+     * get combined when that makes the result more obscure rather
+     * than less.
+     */
     char hvals = 0;
+    /*
+     * Whether we had to evaluate a subexpression, i.e. an
+     * internal ${...} or $(...) or plain $pm.  We almost don't
+     * need to remember this (which would be neater), but the (P)
+     * flag means the subexp and !subexp code is obscurely combined,
+     * and the argument passing to fetchvalue has another kludge.
+     */
     int subexp;
 
     *s++ = '\0';
+    /*
+     * Nothing to do unless the character following the $ is
+     * something we recognise.
+     *
+     * Shouldn't this be a table or something?  We test for all
+     * these later on, too.
+     */
     if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' &&
 	c != '!' && c != '$' && c != String && c != Qstring &&
 	c != '?' && c != Quest && c != '_' &&
@@ -777,9 +1092,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	return n;
     }
     DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()");
+    /*
+     * Extra processing if there is an opening brace: mostly
+     * flags in parentheses, but also one ksh hack.
+     */
     if (c == Inbrace) {
 	inbrace = 1;
 	s++;
+	/*
+	 * In ksh emulation a leading `!' is a special flag working
+	 * sort of like our (k).
+	 * TODO: this is one of very few cases tied directly to
+	 * the emulation mode rather than an option.  Since ksh
+	 * doesn't have parameter flags it might be neater to
+	 * handle this with the ^, =, ~ stuff, below.
+	 */
 	if ((c = *s) == '!' && s[1] != Outbrace && emulation == EMULATE_KSH) {
 	    hkeys = SCANPM_WANTKEYS;
 	    s++;
@@ -787,6 +1114,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    char *t, sav;
 	    int tt = 0;
 	    zlong num;
+	    /*
+	     * The (p) flag is (uniquely) only remembered within
+	     * this block.  It says we do print-style handling
+	     * on the values for flags, but only on those.
+	     * This explains the ghastly macro, but why can't it
+	     * be a function?  UNTOK_AND_ESCAPE is defined
+	     * so that the argument must be an lvalue.
+	     */
 	    int escapes = 0;
 	    int klen;
 #define UNTOK(C)  (itok(C) ? ztokens[(C) - Pound] : (C))
@@ -852,6 +1187,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		case 'i':
 		    casind = 1;
 		    break;
+		case 'n':
+		    numord = 1;
+		    break;
+		case 'a':
+		    indord = 1;
+		    break;
 
 		case 'V':
 		    visiblemod++;
@@ -971,6 +1312,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    presc++;
 		    break;
 
+		case 'z':
+		    shsplit = 1;
+		    break;
+
+		case 'u':
+		    unique = 1;
+		    break;
+
 		default:
 		  flagerr:
 		    zerr("error in flags", NULL, 0);
@@ -980,44 +1329,76 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    s++;
 	}
     }
+    /* Sort is done by indexing on sortit-1:
+     *   bit 1: ascending (o)/descending (O)
+     *   bit 2: case sensitive/independent (i)
+     *   bit 3: strict order/numeric (n)
+     * unless indord (a) is set set, in which case only test for
+     * descending by assuming only (O) is possible (not verified).
+     */
     if (sortit)
-	sortit += (casind << 1);
+	sortit += (casind << 1) + (numord << 2);
 
+    /*
+     * premul, postmul specify the padding character to be used
+     * multiple times with the (l) and (r) flags respectively.
+     */
     if (!premul)
 	premul = " ";
     if (!postmul)
 	postmul = " ";
 
+    /*
+     * Look for special unparenthesised flags.
+     * TODO: could make these able to appear inside parentheses, too,
+     * i.e. ${(^)...} etc.
+     */
     for (;;) {
 	if ((c = *s) == '^' || c == Hat) {
+	    /* RC_EXPAND_PARAM on or off (doubled )*/
 	    if ((c = *++s) == '^' || c == Hat) {
 		plan9 = 0;
 		s++;
 	    } else
 		plan9 = 1;
 	} else if ((c = *s) == '=' || c == Equals) {
+	    /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */
 	    if ((c = *++s) == '=' || c == Equals) {
 		spbreak = 0;
 		s++;
 	    } else
-		spbreak = 1;
+		spbreak = 2;
 	} else if ((c == '#' || c == Pound) &&
 		   (iident(cc = s[1])
 		    || cc == '*' || cc == Star || cc == '@'
 		    || cc == '-' || (cc == ':' && s[2] == '-')
-		    || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar))))
+		    || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
 	    getlen = 1 + whichlen, s++;
-	else if (c == '~' || c == Tilde) {
+	    /*
+	     * Return the length of the parameter.
+	     * getlen can be more than 1 to indicate characters (2),
+	     * words ignoring multiple delimiters (3), words taking
+	     * account of multiple delimiters.  delimiter is in
+	     * spsep, NULL means $IFS.
+	     */
+	} else if (c == '~' || c == Tilde) {
+	    /* GLOB_SUBST on or off (doubled) */
 	    if ((c = *++s) == '~' || c == Tilde) {
 		globsubst = 0;
 		s++;
 	    } else
 		globsubst = 1;
 	} else if (c == '+') {
+	    /*
+	     * Return whether indicated parameter is set. 
+	     * Try to handle this when parameter is named
+	     * by (P) (second part of test).
+	     */
 	    if (iident(s[1]) || (aspar && isstring(s[1]) &&
 				 (s[2] == Inbrace || s[2] == Inpar)))
 		chkset = 1, s++;
 	    else if (!inbrace) {
+		/* Special case for `$+' on its own --- leave unmodified */
 		*aptr = '$';
 		*str = aptr + 1;
 		return n;
@@ -1025,13 +1406,31 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		zerr("bad substitution", NULL, 0);
 		return NULL;
 	    }
-	} else if (inbrace && INULL(*s))
+	} else if (inbrace && INULL(*s)) {
+	    /*
+	     * Handles things like ${(f)"$(<file)"} by skipping 
+	     * the double quotes.  We don't need to know what was
+	     * actually there; the presence of a String or Qstring
+	     * is good enough.
+	     */
 	    s++;
-	else
+	} else
 	    break;
     }
+    /* Don't activate special pattern characters if inside quotes */
     globsubst = globsubst && !qt;
 
+    /*
+     * At this point, we usually expect a parameter name.
+     * However, there may be a nested ${...} or $(...).
+     * These say that the parameter itself is somewhere inside,
+     * or that there isn't a parameter and we will get the values
+     * from a command substitution itself.  In either case,
+     * the current instance of paramsubst() doesn't fetch a value,
+     * it just operates on what gets passed up.
+     * (The first ought to have been {...}, reserving ${...}
+     * for substituting a value at that point, but it's too late now.)
+     */
     idbeg = s;
     if ((subexp = (inbrace && s[-1] && isstring(*s) &&
 		   (s[1] == Inbrace || s[1] == Inpar)))) {
@@ -1042,34 +1441,92 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s);
 	sav = *s;
 	*s = 0;
+	/*
+	 * This handles arrays.  TODO: this is not the most obscure call to
+	 * multsub() (see below) but even so it would be nicer to pass down
+	 * and back the arrayness more rationally.  In that case, we should
+	 * remove the aspar test and extract a value from an array, if
+	 * necessary, when we handle (P) lower down.
+	 */
 	if (multsub(&val, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
+	    /* Empty quoted string --- treat as null string, not elided */
 	    isarr = -1;
 	    aval = (char **) hcalloc(sizeof(char *));
 	    aspar = 0;
 	} else if (aspar)
 	    idbeg = val;
 	*s = sav;
+	/*
+	 * This tests for the second double quote in an expression
+	 * like ${(f)"$(<file)"}, compare above.
+	 */
 	while (INULL(*s))
 	    s++;
 	v = (Value) NULL;
     } else if (aspar) {
-	if ((v = getvalue(&vbuf, &s, 1))) {
+	/*
+	 * No subexpression, but in any case the value is going
+	 * to give us the name of a parameter on which we do
+	 * our remaining processing.  In other words, this
+	 * makes ${(P)param} work like ${(P)${param}}.  (Probably
+	 * better looked at, this is the basic code for ${(P)param}
+	 * and it's been kludged into the subexp code because no
+	 * opportunity for a kludge has been neglected.)
+	 */
+	if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) {
 	    val = idbeg = getstrvalue(v);
 	    subexp = 1;
 	} else
 	    vunset = 1;
     }
+    /*
+     * We need to retrieve a value either if we haven't already
+     * got it from a subexpression, or if the processing so
+     * far has just yielded us a parameter name to be processed
+     * with (P).
+     */
     if (!subexp || aspar) {
 	char *ov = val;
 
+	/*
+	 * Second argument: decide whether to use the subexpression or
+	 *   the string next on the line as the parameter name.
+	 * Third argument:  decide how processing for brackets
+	 *   1 means full processing
+	 *   -1 appears to mean something along the lines of
+	 *     only handle single digits and don't handle brackets.
+	 *     I *think* (but it's really only a guess) that this
+	 *     is used by the test below the wantt handling, so
+	 *     that in certain cases we handle brackets there.
+	 *   0 would apparently mean something like we know we
+	 *     should have the name of a scalar and we get cross
+	 *     if there's anything present which disagrees with that
+	 * but you will search fetchvalue() in vain for comments on this.
+	 * Fourth argument gives flags to do with keys, values, quoting,
+	 * assigning depending on context and parameter flags.
+	 *
+	 * This is the last mention of subexp, so presumably this
+	 * is what the code which makes sure subexp is set if aspar (the
+	 * (P) flag) is set.  I *think* what's going on here is the
+	 * second argument is for both input and output: with
+	 * subexp, we only want the input effect, whereas normally
+	 * we let fetchvalue set the main string pointer s to
+	 * the end of the bit it's fetched.
+	 */
 	if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
 			     (wantt ? -1 :
 			      ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
-			     hkeys|hvals|(arrasg ? SCANPM_ASSIGNING : 0))) ||
+			     hkeys|hvals|
+			     (arrasg ? SCANPM_ASSIGNING : 0)|
+			     (qt ? SCANPM_DQUOTED : 0))) ||
 	    (v->pm && (v->pm->flags & PM_UNSET)))
 	    vunset = 1;
 
 	if (wantt) {
+	    /*
+	     * Handle the (t) flag: value now becomes the type
+	     * information for the parameter.
+	     */
 	    if (v && v->pm && !(v->pm->flags & PM_UNSET)) {
 		int f = v->pm->flags;
 
@@ -1104,6 +1561,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    val = dyncat(val, "-unique");
 		if (f & PM_HIDE)
 		    val = dyncat(val, "-hide");
+		if (f & PM_HIDE)
+		    val = dyncat(val, "-hideval");
 		if (f & PM_SPECIAL)
 		    val = dyncat(val, "-special");
 		vunset = 0;
@@ -1114,8 +1573,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    isarr = 0;
 	}
     }
+    /*
+     * We get in here two ways; either we need to convert v into
+     * the local value system, or we need to get rid of brackets
+     * even if there isn't a v.
+     */
     while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) {
 	if (!v) {
+	    /*
+	     * Index applied to non-existent parameter; we may or may
+	     * not have a value to index, however.  Create a temporary
+	     * empty parameter as a trick, and index on that.  This
+	     * usually happens the second time around the loop when
+	     * we've used up the original parameter value and want to
+	     * apply a subscript to what's left.  However, it's also
+	     * possible it's got something to do with some of that murky
+	     * passing of -1's as the third argument to fetchvalue() to
+	     * inhibit bracket parsing at that stage.
+	     */
 	    Param pm;
 	    char *os = s;
 
@@ -1126,6 +1601,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		isarr = 0;
 	    }
 	    pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR);
+	    DPUTS(!pm, "BUG: parameter not created");
 	    if (isarr)
 		pm->u.arr = aval;
 	    else
@@ -1133,10 +1609,25 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    v = (Value) hcalloc(sizeof *v);
 	    v->isarr = isarr;
 	    v->pm = pm;
-	    v->b = -1;
-	    if (getindex(&s, v) || s == os)
+	    v->end = -1;
+	    if (getindex(&s, v, qt) || s == os)
 		break;
 	}
+	/*
+	 * This is where we extract a value (we know now we have
+	 * one) into the local parameters for a scalar (val) or
+	 * array (aval) value.  TODO: move val and aval into
+	 * a structure with a discriminator.  Hope we can make
+	 * more things array values at this point and dearrayify later.
+	 * v->isarr tells us whether the stuff form down below looks
+	 * like an array.  Unlike multsub() this is probably clean
+	 * enough to keep, although possibly the parameter passing
+	 * needs reorganising.
+	 *
+	 * I think we get to discard the existing value of isarr
+	 * here because it's already been taken account of, either
+	 * in the subexp stuff or immediately above.
+	 */
 	if ((isarr = v->isarr)) {
 	    /* No way to get here with v->inv != 0, so getvaluearr() *
 	     * is called by getarrvalue(); needn't test PM_HASHED.   */
@@ -1146,17 +1637,37 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    } else
 		aval = getarrvalue(v);
 	} else {
+	    /* Value retrieved from parameter/subexpression is scalar */
 	    if (v->pm->flags & PM_ARRAY) {
-		int tmplen = arrlen(v->pm->gets.afn(v->pm));
-
-		if (v->a < 0)
-		    v->a += tmplen + v->inv;
-		if (!v->inv && (v->a >= tmplen || v->a < 0))
+		/*
+		 * Although the value is a scalar, the parameter
+		 * itself is an array.  Presumably this is due to
+		 * being quoted, or doing single substitution or something,
+		 * TODO: we're about to do some definitely stringy
+		 * stuff, so something like this bit is probably
+		 * necessary.  However, I'd like to leave any
+		 * necessary joining of arrays until this point
+		 * to avoid the multsub() horror.
+		 */
+		int tmplen = arrlen(v->pm->gsu.a->getfn(v->pm));
+
+		if (v->start < 0)
+		    v->start += tmplen + v->inv;
+		if (!v->inv && (v->start >= tmplen || v->start < 0))
 		    vunset = 1;
 	    }
 	    if (!vunset) {
+		/*
+		 * There really is a value.  Apply any necessary
+		 * padding or case transformation.  Note these
+		 * are the per-parameter transformations specified
+		 * with typeset, not the per-substitution ones set
+		 * by flags.  TODO: maybe therefore this would
+		 * be more consistent if moved into getstrvalue()?
+		 * Bet that's easier said than done.
+		 */
 		val = getstrvalue(v);
-		fwidth = v->pm->ct ? v->pm->ct : strlen(val);
+		fwidth = v->pm->width ? v->pm->width : (int)strlen(val);
 		switch (v->pm->flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) {
 		    char *t;
 		    unsigned int t0;
@@ -1184,17 +1695,67 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 			int zero = 1;
 
 			if (strlen(val) < fwidth) {
+			    char *valprefend = val;
 			    if (v->pm->flags & PM_RIGHT_Z) {
-				for (t = val; iblank(*t); t++);
-				if (!*t || !idigit(*t))
+				/*
+				 * This is a documented feature: when deciding
+				 * whether to pad with zeroes, ignore
+				 * leading blanks already in the value;
+				 * only look for numbers after that.
+				 * Not sure how useful this really is.
+				 * It's certainly confusing to code around.
+				 */
+				for (t = val; iblank(*t); t++)
+				    ;
+				/*
+				 * Allow padding after initial minus
+				 * for numeric variables.
+				 */
+				if ((v->pm->flags &
+				     (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) &&
+				    *t == '-')
+				    t++;
+				/*
+				 * Allow padding after initial 0x or
+				 * base# for integer variables.
+				 */
+				if (v->pm->flags & PM_INTEGER) {
+				    if (isset(CBASES) &&
+					t[0] == '0' && t[1] == 'x')
+					t += 2;
+				    else if ((valprefend = strchr(t, '#')))
+					t = valprefend + 1;
+				}
+				valprefend = t;
+				if (!*t)
+				    zero = 0;
+				else if (v->pm->flags &
+					 (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) {
+				    /* zero always OK */
+				} else if (!idigit(*t))
 				    zero = 0;
 			    }
 			    t = (char *) hcalloc(fwidth + 1);
 			    memset(t, (((v->pm->flags & PM_RIGHT_B) || !zero) ?
 				       ' ' : '0'), fwidth);
+			    /*
+			     * How can the following trigger?  We
+			     * haven't altered val or fwidth since
+			     * the last time we tested this.
+			     */
 			    if ((t0 = strlen(val)) > fwidth)
 				t0 = fwidth;
-			    strcpy(t + (fwidth - t0), val);
+			    /*
+			     * Copy - or 0x or base# before any padding
+			     * zeroes.
+			     */
+			    if (zero && val != valprefend) {
+				int preflen = valprefend - val;
+				memcpy(t, val, preflen);
+				strcpy(t + (fwidth - t0) + preflen,
+				       valprefend);
+			    } else
+				strcpy(t + (fwidth - t0), val);
 			    val = t;
 			} else {
 			    t = (char *) hcalloc(fwidth + 1);
@@ -1221,10 +1782,58 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		}
 	    }
 	}
+	/*
+	 * Finished with the original parameter and its indices;
+	 * carry on looping to see if we need to do more indexing.
+	 * This means we final get rid of v in favour of val and
+	 * aval.  We could do with somehow encapsulating the bit
+	 * where we need v.
+	 */
 	v = NULL;
 	if (!inbrace)
 	    break;
     }
+    /*
+     * We're now past the name or subexpression; the only things
+     * which can happen now are a closing brace, one of the standard
+     * parameter postmodifiers, or a history-style colon-modifier.
+     *
+     * Again, this duplicates tests for characters we're about to
+     * examine properly later on.
+     */
+    if (inbrace &&
+	(c = *s) != '-' && c != '+' && c != ':' && c != '%'  && c != '/' &&
+	c != '=' && c != Equals &&
+	c != '#' && c != Pound &&
+	c != '?' && c != Quest &&
+	c != '}' && c != Outbrace) {
+	zerr("bad substitution", NULL, 0);
+	return NULL;
+    }
+    /*
+     * Join arrays up if we're in quotes and there isn't some
+     * override such as (@).
+     * TODO: hmm, if we're called as part of some recursive
+     * substitution do we want to delay this until we get back to
+     * the top level?  Or is if there's a qt (i.e. this parameter
+     * substitution is in quotes) always good enough?  Potentially
+     * we may be OK by now --- all potential `@'s and subexpressions
+     * have been handled, including any [@] index which comes up
+     * by virture of v->isarr being set to SCANPM_ISVAR_AT which
+     * is now in isarr.
+     *
+     * However, if we are replacing multsub() with something that
+     * doesn't mangle arrays, we may need to delay this step until after
+     * the foo:- or foo:= or whatever that causes that.  Note the value
+     * (string or array) at this point is irrelevant if we are going to
+     * be doing that.  This would mean // and stuff get applied
+     * arraywise even if quoted.  That's probably wrong, so maybe
+     * this just stays.
+     *
+     * We do a separate stage of dearrayification in the YUK chunk,
+     * I think mostly because of the way we make array or scalar
+     * values appear to the caller.
+     */
     if (isarr) {
 	if (nojoin)
 	    isarr = -1;
@@ -1235,9 +1844,20 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     }
 
     idend = s;
-    if (inbrace)
+    if (inbrace) {
+	/*
+	 * This is to match a closing double quote in case
+	 * we didn't have a subexpression, e.g. ${"foo"}.
+	 * This form is pointless, but logically it ought to work.
+	 */
 	while (INULL(*s))
 	    s++;
+    }
+    /*
+     * We don't yet know whether a `:' introduces a history-style
+     * colon modifier or qualifies something like ${...:=...}.
+     * But if we remember the colon here it's easy to check later.
+     */
     if ((colf = *s == ':'))
 	s++;
 
@@ -1268,13 +1888,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 
     if (inbrace && ((c = *s) == '-' ||
 		    c == '+' ||
-		    c == ':' ||
+		    c == ':' ||	/* i.e. a doubled colon */
 		    c == '=' || c == Equals ||
 		    c == '%' ||
 		    c == '#' || c == Pound ||
 		    c == '?' || c == Quest ||
 		    c == '/')) {
 
+	/*
+	 * Default index is 1 if no (I) or (I) gave zero.   But
+	 * why don't we set the default explicitly at the start
+	 * and massage any passed index where we set flnum anyway?
+	 */
 	if (!flnum)
 	    flnum++;
 	if (c == '%')
@@ -1297,7 +1922,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    if ((c = *s) == '/') {
 		/* doubled, so replace all occurrences */
 		flags |= SUB_GLOBAL;
-		s++;
+		c = *++s;
 	    }
 	    /* Check for anchored substitution */
 	    if (c == '%') {
@@ -1314,19 +1939,25 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	     * If there isn't one, we're just going to delete that,
 	     * i.e. replace it with an empty string.
 	     *
-	     * This allows quotation of the slash with '\\/'. Why
-	     * two?  Well, for a non-quoted string we can check for
-	     * Bnull+/, which is what you get from `\/', but inside
-	     * double quotes the Bnull isn't there, so it's not
-	     * consistent.
+	     * We used to use double backslashes to quote slashes,
+	     * but actually that was buggy and using a single backslash
+	     * is easier and more obvious.
 	     */
 	    for (ptr = s; (c = *ptr) && c != '/'; ptr++)
-		if (c == '\\' && ptr[1] == '/')
-		    chuck(ptr);
+	    {
+		if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1])
+		{
+		    if (ptr[1] == '/')
+			chuck(ptr);
+		    else
+			ptr++;
+		}
+	    }
 	    replstr = (*ptr && ptr[1]) ? ptr+1 : "";
 	    *ptr = '\0';
 	}
 
+	/* See if this was ${...:-...}, ${...:=...}, etc. */
 	if (colf)
 	    flags |= SUB_ALL;
 	/*
@@ -1353,11 +1984,29 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	case '-':
 	    if (vunset) {
 		val = dupstring(s);
-		multsub(&val, NULL, &isarr, NULL);
+		/*
+		 * This is not good enough for sh emulation!  Sh would
+		 * split unquoted substrings, yet not split quoted ones
+		 * (except according to $@ rules); but this leaves the
+		 * unquoted substrings unsplit, and other code below
+		 * for spbreak splits even within the quoted substrings.
+		 *
+		 * TODO: I think multsub needs to be told enough to
+		 * decide about splitting with spbreak at this point
+		 * (and equally in the `=' handler below).  Then
+		 * we can turn off spbreak to avoid the join & split
+		 * nastiness later.
+		 *
+		 * What we really want to do is make this look as
+		 * if it were the result of an assignment from
+		 * the same value, taking account of quoting.
+		 */
+		multsub(&val, (aspar ? NULL : &aval), &isarr, NULL);
 		copied = 1;
 	    }
 	    break;
 	case ':':
+	    /* this must be `::=', unconditional assignment */
 	    if (*s != '=' && *s != Equals)
 		goto noclosebrace;
 	    vunset = 1;
@@ -1372,17 +2021,26 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		*idend = '\0';
 		val = dupstring(s);
 		isarr = 0;
+		/*
+		 * TODO: this is one of those places where I don't
+		 * think we want to do the joining until later on.
+		 * We also need to handle spbreak and spsep at this
+		 * point and unset them.
+		 */
 		if (spsep || spbreak || !arrasg)
 		    multsub(&val, NULL, NULL, sep);
 		else
 		    multsub(&val, &aval, &isarr, NULL);
 		if (arrasg) {
+		    /*
+		     * This is an array assignment in a context
+		     * where we have no syntactic way of finding
+		     * out what an array element is.  So we just guess.
+		     */
 		    char *arr[2], **t, **a, **p;
 		    if (spsep || spbreak) {
 			aval = sepsplit(val, spsep, 0, 1);
 			isarr = 2;
-			sep = spsep = NULL;
-			spbreak = 0;
 			l = arrlen(aval);
 			if (l && !*(aval[l-1]))
 			    l--;
@@ -1411,7 +2069,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    if (arrasg > 1) {
 			Param pm = sethparam(idbeg, a);
 			if (pm)
-			    aval = paramvalarr(pm->gets.hfn(pm), hkeys|hvals);
+			    aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals);
 		    } else
 			setaparam(idbeg, a);
 		} else {
@@ -1420,6 +2078,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		}
 		*idend = sav;
 		copied = 1;
+		if (isarr) {
+		  if (nojoin)
+		    isarr = -1;
+		  if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) {
+		    val = sepjoin(aval, sep, 1);
+		    isarr = 0;
+		  }
+		  sep = spsep = NULL;
+		  spbreak = 0;
+		}
 	    }
 	    break;
 	case '?':
@@ -1440,7 +2108,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	case '#':
 	case Pound:
 	case '/':
-	    if (qt) {
+            /* This once was executed only `if (qt) ...'. But with that
+             * patterns in a expansion resulting from a ${(e)...} aren't
+             * tokenized even though this function thinks they are (it thinks
+             * they are because subst_parse_str() turns Qstring tokens
+             * into String tokens and for unquoted parameter expansions the
+             * lexer normally does tokenize patterns inside parameter
+             * expansions). */
+            {
 		int one = noerrs, oef = errflag, haserr;
 
 		if (!quoteerr)
@@ -1450,7 +2125,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		if (!quoteerr) {
 		    errflag = oef;
 		    if (haserr)
-			tokenize(s);
+			shtokenize(s);
 		} else if (haserr || errflag) {
 		    zerr("parse error in ${...%c...} substitution",
 			 NULL, s[-1]);
@@ -1458,9 +2133,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		}
 	    }
 	    {
+#if 0
+		/*
+		 * This allows # and % to be at the start of
+		 * a parameter in the substitution, which is
+		 * a bit nasty, and can be done (although
+		 * less efficiently) with anchors.
+		 */
+
 		char t = s[-1];
 
 		singsub(&s);
+
 		if (t == '/' && (flags & SUB_SUBSTR)) {
 		    if ((c = *s) == '#' || c == '%') {
 			flags &= ~SUB_SUBSTR;
@@ -1471,8 +2155,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 			s++;
 		    }
 		}
+#else
+		singsub(&s);
+#endif
 	    }
 
+	    /*
+	     * Either loop over an array doing replacements or
+	     * do the replacment on a string.
+	     */
 	    if (!vunset && isarr) {
 		getmatcharr(&aval, s, flags, flnum, replstr);
 		copied = 1;
@@ -1485,6 +2176,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    break;
 	}
     } else {			/* no ${...=...} or anything, but possible modifiers. */
+	/*
+	 * Handler ${+...}.  TODO: strange, why do we handle this only
+	 * if there isn't a trailing modifier?  Why don't we do this
+	 * e.g. when we hanlder the ${(t)...} flag?
+	 */
 	if (chkset) {
 	    val = dupstring(vunset ? "0" : "1");
 	    isarr = 0;
@@ -1497,6 +2193,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    val = dupstring("");
 	}
 	if (colf) {
+	    /*
+	     * History style colon modifiers.  May need to apply
+	     * on multiple elements of an array.
+	     */
 	    s--;
 	    if (unset(KSHARRAYS) || inbrace) {
 		if (!isarr)
@@ -1533,6 +2233,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     }
     if (errflag)
 	return NULL;
+    /*
+     * This handles taking a length with ${#foo} and variations.
+     * TODO: again. one might naively have thought this had the
+     * same sort of effect as the ${(t)...} flag and the ${+...}
+     * test, although in this case we do need the value rather
+     * the the parameter, so maybe it's a bit different.
+     */
     if (getlen) {
 	long len = 0;
 	char buf[14];
@@ -1563,6 +2270,23 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	val = dupstring(buf);
 	isarr = 0;
     }
+    /*
+     * I think this mult_isarr stuff here is used to pass back
+     * the setting of whether we are an array to multsub, and
+     * thence to the top-level paramsubst().  The way the
+     * setting is passed back is completely obscure, however.
+     * It's presumably at this point because we try to remember
+     * whether the value was `really' an array before massaging
+     * some special cases.
+     *
+     * TODO: YUK.  This is not the right place to turn arrays into
+     * scalars; we should pass back as an array, and let the calling
+     * code decide how to deal with it.  This is almost certainly
+     * a lot harder than it sounds.  Do we really need to handle
+     * one-element arrays as scalars at this point?  Couldn't
+     * we just test for it later rather than having a multiple-valued
+     * wave-function for isarr?
+     */
     mult_isarr = isarr;
     if (isarr > 0 && !plan9 && (!aval || !aval[0])) {
 	val = dupstring("");
@@ -1577,6 +2301,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     }
     /* ssub is true when we are called from singsub (via prefork).
      * It means that we must join arrays and should not split words. */
+    /*
+     * TODO: this is what is screwing up the use of SH_WORD_SPLIT
+     * after `:-' etc.  If we fix multsub(), we might get away
+     * with simply unsetting the appropriate flags when they
+     * get handled.
+     */
     if (ssub || spbreak || spsep || sep) {
 	if (isarr)
 	    val = sepjoin(aval, sep, 1), isarr = 0;
@@ -1591,6 +2321,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	}
 	mult_isarr = isarr;
     }
+    /*
+     * Perform case modififications.
+     */
     if (casmod) {
 	if (isarr) {
 	    char **ap;
@@ -1620,6 +2353,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		makecapitals(&val);
 	}
     }
+    /*
+     * Perform prompt-style modifications.
+     */
     if (presc) {
 	int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG];
 	int opp = opts[PROMPTPERCENT], len;
@@ -1628,6 +2364,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    opts[PROMPTPERCENT] = 1;
 	    opts[PROMPTSUBST] = opts[PROMPTBANG] = 0;
 	}
+	/*
+	 * TODO:  It would be really quite nice to abstract the
+	 * isarr and !issarr code into a function which gets
+	 * passed a pointer to a function with the effect of
+	 * the promptexpand bit.  Then we could use this for
+	 * a lot of stuff and bury val/aval/isarr inside a structure
+	 * which gets passed to it.
+	 */
 	if (isarr) {
 	    char **ap;
 
@@ -1635,23 +2379,33 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		aval = arrdup(aval), copied = 1;
 	    ap = aval;
 	    for (; *ap; ap++) {
+		char *tmps;
 		unmetafy(*ap, &len);
 		untokenize(*ap);
-		*ap = unmetafy(promptexpand(metafy(*ap, len, META_NOALLOC),
-					    0, NULL, NULL), &len);
+		tmps = unmetafy(promptexpand(metafy(*ap, len, META_NOALLOC),
+					     0, NULL, NULL), &len);
+		*ap = dupstring(tmps);
+		free(tmps);
 	    }
 	} else {
+	    char *tmps;
 	    if (!copied)
 		val = dupstring(val), copied = 1;
 	    unmetafy(val, &len);
 	    untokenize(val);
-	    val = unmetafy(promptexpand(metafy(val, len, META_NOALLOC),
+	    tmps = unmetafy(promptexpand(metafy(val, len, META_NOALLOC),
 					0, NULL, NULL), &len);
+	    val = dupstring(tmps);
+	    free(tmps);
 	}
 	opts[PROMPTSUBST] = ops;
 	opts[PROMPTBANG] = opb;
 	opts[PROMPTPERCENT] = opp;
     }
+    /*
+     * One of the possible set of quotes to apply, depending on
+     * the repetitions of the (q) flag.
+     */
     if (quotemod) {
 	if (--quotetype > 3)
 	    quotetype = 3;
@@ -1735,6 +2489,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    }
 	}
     }
+    /*
+     * Transform special characters in the string to make them
+     * printable.
+     */
     if (visiblemod) {
 	if (isarr) {
 	    char **ap;
@@ -1748,6 +2506,54 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    val = nicedupstring(val);
 	}
     }
+    /*
+     * Nothing particularly to do with SH_WORD_SPLIT --- this
+     * performs lexical splitting on a string as specified by
+     * the (z) flag.
+     */
+    if (shsplit) {
+	LinkList list = NULL;
+
+	if (isarr) {
+	    char **ap;
+	    for (ap = aval; *ap; ap++)
+		list = bufferwords(list, *ap, NULL);
+	    isarr = 0;
+	} else
+	    list = bufferwords(NULL, val, NULL);
+
+	if (!list || !firstnode(list))
+	    val = dupstring("");
+	else if (!nextnode(firstnode(list)))
+	    val = getdata(firstnode(list));
+	else {
+	    char **ap;
+	    LinkNode node;
+
+	    aval = ap = (char **) zhalloc((countlinknodes(list) + 1) *
+					  sizeof(char *));
+	    for (node = firstnode(list); node; incnode(node))
+		*ap++ = (char *) getdata(node);
+	    *ap = NULL;
+	    mult_isarr = isarr = 2;
+	}
+	copied = 1;
+    }
+    /*
+     * TODO: hmm.  At this point we have to be on our toes about
+     * whether we're putting stuff into a line or not, i.e.
+     * we don't want to do this from a recursive call; this is
+     * probably part of the point of the mult_isarr monkey business.
+     * Rather than passing back flags in a non-trivial way, maybe
+     * we could decide on the basis of flags passed down to us.
+     *
+     * This is the ideal place to do any last-minute conversion from
+     * array to strings.  However, given all the transformations we've
+     * already done, probably if it's going to be done it will already
+     * have been.  (I'd really like to keep everying in aval or
+     * equivalent and only locally decide if we need to treat it
+     * as a scalar.)
+     */
     if (isarr) {
 	char *x;
 	char *y;
@@ -1755,36 +2561,78 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	int i;
 	LinkNode on = n;
 
-	if (!aval[0] && !plan9) {
+	/* Handle the (u) flag; we need this before the next test */
+	if (unique) {
+	    if(!copied)
+		aval = arrdup(aval);
+
+	    i = arrlen(aval);
+	    if (i > 1)
+		zhuniqarray(aval);
+	}
+	if ((!aval[0] || !aval[1]) && !plan9) {
+	    /*
+	     * Empty array or single element.  Currently you only
+	     * get a single element array at this point from the
+	     * unique expansion above. but we can potentially
+	     * have other reasons.
+	     *
+	     * The following test removes the markers
+	     * from surrounding double quotes, but I don't know why
+	     * that's necessary.
+	     */
+	    int vallen;
 	    if (aptr > (char *) getdata(n) &&
 		aptr[-1] == Dnull && *fstr == Dnull)
 		*--aptr = '\0', fstr++;
-	    y = (char *) hcalloc((aptr - ostr) + strlen(fstr) + 1);
+	    vallen = aval[0] ? strlen(aval[0]) : 0;
+	    y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1);
 	    strcpy(y, ostr);
 	    *str = y + (aptr - ostr);
+	    if (vallen)
+	    {
+		strcpy(*str, aval[0]);
+		*str += vallen;
+	    }
 	    strcpy(*str, fstr);
 	    setdata(n, y);
 	    return n;
 	}
+	/* Handle (o) and (O) and their variants */
 	if (sortit) {
-	    static CompareFn sortfn[] = {
-		strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp
-	    };
-
 	    if (!copied)
 		aval = arrdup(aval);
-
-	    i = arrlen(aval);
-	    if (i && (*aval[i-1] || --i))
-		qsort(aval, i, sizeof(char *), sortfn[sortit-1]);
+	    if (indord) {
+		if (sortit & 2) {
+		    char *copy;
+		    char **end = aval + arrlen(aval) - 1, **start = aval;
+
+		    /* reverse the array */
+		    while (start < end) {
+			copy = *end;
+			*end-- = *start;
+			*start++ = copy;
+		    }
+		}
+	    } else {
+		static CompareFn sortfn[] = {
+		    strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp,
+		    nstrpcmp, invnstrpcmp, instrpcmp, invinstrpcmp
+		};
+
+		i = arrlen(aval);
+		if (i && (*aval[i-1] || --i))
+		    qsort(aval, i, sizeof(char *), sortfn[sortit-1]);
+	    }
 	}
 	if (plan9) {
+	    /* Handle RC_EXPAND_PARAM */
 	    LinkNode tn;
 	    local_list1(tl);
 
 	    *--fstr = Marker;
 	    init_list1(tl, fstr);
-	    if (!eval && !stringsubst(&tl, firstnode(&tl), ssub))
+	    if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0))
 		return NULL;
 	    *str = aptr;
 	    tn = firstnode(&tl);
@@ -1792,7 +2640,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		if (prenum || postnum)
 		    x = dopadding(x, prenum, postnum, preone, postone,
 				  premul, postmul);
-		if (eval && subst_parse_str(&x, (qt && !nojoin)))
+		if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
 		    return NULL;
 		xlen = strlen(x);
 		for (tn = firstnode(&tl);
@@ -1824,11 +2672,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		return n;
 	    }
 	} else {
+	    /*
+	     * Not RC_EXPAND_PARAM: simply join the first and
+	     * last values.
+	     * TODO: how about removing the restriction that
+	     * aval[1] is non-NULL to promote consistency?, or
+	     * simply changing the test so that we drop into
+	     * the scalar branch, instead of tricking isarr?
+	     */
 	    x = aval[0];
 	    if (prenum || postnum)
 		x = dopadding(x, prenum, postnum, preone, postone,
 			      premul, postmul);
-	    if (eval && subst_parse_str(&x, (qt && !nojoin)))
+	    if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
 		return NULL;
 	    xlen = strlen(x);
 	    strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied);
@@ -1843,14 +2699,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		if (prenum || postnum)
 		    x = dopadding(x, prenum, postnum, preone, postone,
 				  premul, postmul);
-		if (eval && subst_parse_str(&x, (qt && !nojoin)))
+		if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
 		    return NULL;
 		if (qt && !*x && isarr != 2)
 		    y = dupstring(nulstring);
 		else {
 		    y = dupstring(x);
 		    if (globsubst)
-			tokenize(y);
+			shtokenize(y);
 		}
 		insertlinknode(l, n, (void *) y), incnode(n);
 	    }
@@ -1859,7 +2715,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    if (prenum || postnum)
 		x = dopadding(x, prenum, postnum, preone, postone,
 			      premul, postmul);
-	    if (eval && subst_parse_str(&x, (qt && !nojoin)))
+	    if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
 		return NULL;
 	    xlen = strlen(x);
 	    *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied);
@@ -1870,6 +2726,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	if (eval)
 	    n = on;
     } else {
+	/*
+	 * Scalar value.  Handle last minute transformations
+	 * such as left- or right-padding and the (e) flag to
+	 * revaluate the result.
+	 */
 	int xlen;
 	char *x;
 	char *y;
@@ -1878,7 +2739,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	if (prenum || postnum)
 	    x = dopadding(x, prenum, postnum, preone, postone,
 			  premul, postmul);
-	if (eval && subst_parse_str(&x, (qt && !nojoin)))
+	if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
 	    return NULL;
 	xlen = strlen(x);
 	*str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied);
@@ -1905,15 +2766,18 @@ static char *
 arithsubst(char *a, char **bptr, char *rest)
 {
     char *s = *bptr, *t;
-    char buf[DIGBUFSIZE], *b = buf;
+    char buf[BDIGBUFSIZE], *b = buf;
     mnumber v;
 
     singsub(&a);
     v = matheval(a);
-    if (v.type & MN_FLOAT)
+    if ((v.type & MN_FLOAT) && !outputradix)
 	b = convfloat(v.u.d, 0, 0, NULL);
-    else
-	convbase(buf, v.u.l, 0);
+    else {
+	if (v.type & MN_FLOAT)
+	    v.u.l = (zlong) v.u.d;
+	convbase(buf, v.u.l, outputradix);
+    }
     t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) + 
 				 strlen(rest) + 1);
     t--;
@@ -1975,18 +2839,18 @@ modify(char **str, char **ptr)
 		if (*ptr1) {
 		    zsfree(hsubl);
 		    hsubl = ztrdup(ptr1);
-		}
+ 		}
 		if (!hsubl) {
 		    zerr("no previous substitution", NULL, 0);
 		    return;
 		}
 		zsfree(hsubr);
 		for (tt = hsubl; *tt; tt++)
-		    if (INULL(*tt))
+		    if (INULL(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
 		untokenize(hsubl);
 		for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
-		    if (INULL(*tt))
+		    if (INULL(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
 		ptr2[-1] = del;
 		if (sav)
diff --git a/Src/zsh.h b/Src/zsh.h
index 4500cb21e..a0959456a 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -128,7 +128,10 @@ struct mathfunc {
 
 #define DEFAULT_IFS	" \t\n\203 "
 
-/* Character tokens */
+/*
+ * Character tokens.
+ * These should match the characters in ztokens, defined in lex.c
+ */
 #define Pound		((char) 0x84)
 #define String		((char) 0x85)
 #define Hat		((char) 0x86)
@@ -149,15 +152,33 @@ struct mathfunc {
 #define Tilde		((char) 0x95)
 #define Qtick		((char) 0x96)
 #define Comma		((char) 0x97)
+/*
+ * Null arguments: placeholders for single and double quotes
+ * and backslashes.
+ */
 #define Snull		((char) 0x98)
 #define Dnull		((char) 0x99)
 #define Bnull		((char) 0x9a)
-#define Nularg		((char) 0x9b)
+/*
+ * Backslash which will be returned to "\" instead of being stripped
+ * when we turn the string into a printable format.
+ */
+#define Bnullkeep       ((char) 0x9b)
+/*
+ * Null argument that does not correspond to any character.
+ * This should be last as it does not appear in ztokens and
+ * is used to initialise the IMETA type in inittyptab().
+ */
+#define Nularg		((char) 0x9c)
 
-#define INULL(x)	(((x) & 0xfc) == 0x98)
+#define INULL(x)	((x) >= Snull && (x) <= Nularg)
 
+/*
+ * Take care to update the use of IMETA appropriately when adding
+ * tokens here.
+ */
 /* Marker used in paramsubst for rc_expand_param */
-#define Marker		((char) 0x9c)
+#define Marker		((char) 0xa0)
 
 /* chars that need to be quoted if meant literally */
 
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 3ad19368d..6abba0ab3 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -196,6 +196,20 @@
 >* boringfile evenmoreboringfile boringfile evenmoreboringfile
 >boringfile evenmoreboringfile
 
+# The following tests a bug where globsubst didn't preserve
+# backslashes when printing out the original string.
+  str1='\\*\\'
+  (
+  setopt globsubst nonomatch
+  [[ \\\\ = $str1 ]] && print -r '\\ matched by' $str1
+  [[ \\foo\\ = $str1 ]] && print -r '\\foo matched by' $str1
+  [[ a\\b\\ = $str1 ]] || print -r 'a\\b not matched by' $str1
+  )
+0:globsubst with backslashes
+>\\ matched by \\*\\
+>\\foo matched by \\*\\
+>a\\b not matched by \\*\\
+
   print -l "${$(print one word)}" "${=$(print two words)}"
 0:splitting of $(...) inside ${...}
 >one word
diff --git a/Test/ztst.zsh b/Test/ztst.zsh
index fe996832c..c0fbc179f 100755
--- a/Test/ztst.zsh
+++ b/Test/ztst.zsh
@@ -22,9 +22,17 @@
 # still not be good enough.  Maybe we should trick it somehow.
 emulate -R zsh
 
+# Ensure the locale does not screw up sorting.  Don't supply a locale
+# unless there's one set, to minimise problems.
+[[ -n $LC_ALL ]] && LC_ALL=C
+[[ -n $LC_COLLATE ]] && LC_COLLATE=C
+[[ -n $LANG ]] && LANG=C
+
 # Set the module load path to correspond to this build of zsh.
 # This Modules directory should have been created by "make check".
 [[ -d Modules/zsh ]] && module_path=( $PWD/Modules )
+# Allow this to be passed down.
+export MODULE_PATH
 
 # We need to be able to save and restore the options used in the test.
 # We use the $options variable of the parameter module for this.
@@ -47,12 +55,48 @@ ZTST_mainopts=(${(kv)options})
 ZTST_testdir=$PWD
 ZTST_testname=$1
 
-# The source directory is not necessarily the current directory
-ZTST_srcdir=${0%/*}
+integer ZTST_testfailed
+
+# This is POSIX nonsense.  Because of the vague feeling someone, somewhere
+# may one day need to examine the arguments of "tail" using a standard
+# option parser, every Unix user in the world is expected to switch
+# to using "tail -n NUM" instead of "tail -NUM".  Older versions of
+# tail don't support this.
+tail() {
+  emulate -L zsh
+
+  if [[ -z $TAIL_SUPPORTS_MINUS_N ]]; then
+    local test
+    test=$(echo "foo\nbar" | command tail -n 1 2>/dev/null)
+    if [[ $test = bar ]]; then
+      TAIL_SUPPORTS_MINUS_N=1
+    else
+      TAIL_SUPPORTS_MINUS_N=0
+    fi
+  fi
+
+  integer argi=${argv[(i)-<->]}
+
+  if [[ $argi -le $# && $TAIL_SUPPORTS_MINUS_N = 1 ]]; then
+    argv[$argi]=(-n ${argv[$argi][2,-1]})
+  fi
+
+  command tail "$argv[@]"
+}
+
+# The source directory is not necessarily the current directory,
+# but if $0 doesn't contain a `/' assume it is.
+if [[ $0 = */* ]]; then
+  ZTST_srcdir=${0%/*}
+else
+  ZTST_srcdir=$PWD
+fi
 [[ $ZTST_srcdir = /* ]] || ZTST_srcdir="$ZTST_testdir/$ZTST_srcdir"
 
 # Set the function autoload paths to correspond to this build of zsh.
-fpath=( $ZTST_srcdir/../(Completion|Functions)/*~*/CVS(/) )
+fpath=( $ZTST_srcdir/../Functions/*~*/CVS(/)
+        $ZTST_srcdir/../Completion
+        $ZTST_srcdir/../Completion/*/*~*/CVS(/) )
 
 : ${TMPPREFIX:=/tmp/zsh}
 # Temporary files for redirection inside tests.
@@ -66,14 +110,15 @@ ZTST_terr=${TMPPREFIX}.ztst.terr.$$
 
 ZTST_cleanup() {
   cd $ZTST_testdir
-  rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp ${TMPPREFIX}.ztst*$$
+  rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp(N) \
+    ${TMPPREFIX}.ztst*$$(N)
 }
 
 # This cleanup always gets performed, even if we abort.  Later,
 # we should try and arrange that any test-specific cleanup
 # always gets called as well.
-trap - 'print cleaning up...
-ZTST_cleanup' INT QUIT TERM
+##trap 'print cleaning up...
+##ZTST_cleanup' INT QUIT TERM
 # Make sure it's clean now.
 rm -rf dummy.tmp *.tmp
 
@@ -85,20 +130,31 @@ ZTST_testfailed() {
     print -r "Was testing: $ZTST_message"
   fi
   print -r "$ZTST_testname: test failed."
-  ZTST_cleanup
-  exit 1
+  if [[ -n $ZTST_failmsg ]]; then
+    print -r "The following may (or may not) help identifying the cause:
+$ZTST_failmsg"
+  fi
+  ZTST_testfailed=1
+  return 1
 }
 
 # Print messages if $ZTST_verbose is non-empty
 ZTST_verbose() {
   local lev=$1
   shift
-  [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -- $* >&8
+  [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -r -- $* >&8
+}
+ZTST_hashmark() {
+  [[ ZTST_verbose -le 0 && -t 8 ]] && print -nu8 ${(pl:SECONDS::\#::\#\r:)}
+  (( SECONDS > COLUMNS+1 && (SECONDS -= COLUMNS) ))
 }
 
-[[ ! -r $ZTST_testname ]] && ZTST_testfailed "can't read test file."
+if [[ ! -r $ZTST_testname ]]; then
+  ZTST_testfailed "can't read test file."
+  exit 1
+fi
 
-[[ -n $ZTST_verbose && $ZTST_verbose -ge 0 ]] && exec 8>&1
+exec 8>&1
 exec 9<$ZTST_testname
 
 # The current line read from the test file.
@@ -118,15 +174,18 @@ ZTST_getline() {
 
 # Get the name of the section.  It may already have been read into
 # $curline, or we may have to skip some initial comments to find it.
+# If argument present, it's OK to skip the reset of the current section,
+# so no error if we find garbage.
 ZTST_getsect() {
   local match mbegin mend
 
   while [[ $ZTST_curline != '%'(#b)([[:alnum:]]##)* ]]; do
     ZTST_getline || return 1
     [[ $ZTST_curline = [[:blank:]]# ]] && continue
-    if [[ $ZTST_curline != '%'[[:alnum:]]##* ]]; then
+    if [[ $# -eq 0 && $ZTST_curline != '%'[[:alnum:]]##* ]]; then
       ZTST_testfailed "bad line found before or after section:
 $ZTST_curline"
+      exit 1
     fi
   done
   # have the next line ready waiting
@@ -169,13 +228,14 @@ ${ZTST_curline[2,-1]}"
 $ZTST_redir"
 
 case $char in
-  '<') fn=$ZTST_in
+  ('<') fn=$ZTST_in
        ;;
-  '>') fn=$ZTST_out
+  ('>') fn=$ZTST_out
        ;;
-  '?') fn=$ZTST_err
+  ('?') fn=$ZTST_err
        ;;
-   *)  ZTST_testfailed "bad redir operator: $char"
+   (*)  ZTST_testfailed "bad redir operator: $char"
+       return 1
        ;;
 esac
 if [[ $ZTST_flags = *q* ]]; then
@@ -183,6 +243,8 @@ if [[ $ZTST_flags = *q* ]]; then
 else
   print -r -- "$ZTST_redir" >>$fn
 fi
+
+return 0
 }
 
 # Execute an indented chunk.  Redirections will already have
@@ -191,9 +253,10 @@ ZTST_execchunk() {
   options=($ZTST_testopts)
   eval "$ZTST_code"
   ZTST_status=$?
+  # careful... ksh_arrays may be in effect.
+  ZTST_testopts=(${(kv)options[*]})
+  options=(${ZTST_mainopts[*]})
   ZTST_verbose 2 "ZTST_execchunk: status $ZTST_status"
-  ZTST_testopts=(${(kv)options})
-  options=($ZTST_mainopts)
   return $ZTST_status
 }
 
@@ -202,12 +265,27 @@ ZTST_execchunk() {
 ZTST_prepclean() {
   # Execute indented code chunks.
   while ZTST_getchunk; do
-    ZTST_execchunk >/dev/null || [[ -n $1 ]] ||
-    ZTST_testfailed "non-zero status from preparation code:
-$ZTST_code"
+    ZTST_execchunk >/dev/null || [[ -n $1 ]] || {
+      [[ -n "$ZTST_unimplemented" ]] ||
+      ZTST_testfailed "non-zero status from preparation code:
+$ZTST_code" && return 0
+    }
   done
 }
 
+# diff wrapper
+ZTST_diff() {
+  local diff_out diff_ret
+
+  diff_out=$(diff "$@")
+  diff_ret="$?"
+  if [[ "$diff_ret" != "0" ]]; then
+    print -r "$diff_out"
+  fi
+
+  return "$diff_ret"
+}
+    
 ZTST_test() {
   local last match mbegin mend found
 
@@ -215,6 +293,7 @@ ZTST_test() {
     rm -f $ZTST_in $ZTST_out $ZTST_err
     touch $ZTST_in $ZTST_out $ZTST_err
     ZTST_message=''
+    ZTST_failmsg=''
     found=0
 
     ZTST_verbose 2 "ZTST_test: looking for new test"
@@ -223,14 +302,14 @@ ZTST_test() {
       ZTST_verbose 2 "ZTST_test: examining line:
 $ZTST_curline"
       case $ZTST_curline in
-	%*) if [[ $found = 0 ]]; then
+	(%*) if [[ $found = 0 ]]; then
 	      break 2
 	    else
 	      last=1
 	      break
 	    fi
 	    ;;
-	[[:space:]]#)
+	([[:space:]]#)
 	    if [[ $found = 0 ]]; then
 	      ZTST_getline || break 2
 	      continue
@@ -238,7 +317,7 @@ $ZTST_curline"
 	      break
 	    fi
 	    ;;
-	[[:space:]]##[^[:space:]]*) ZTST_getchunk
+	([[:space:]]##[^[:space:]]*) ZTST_getchunk
 	  if [[ $ZTST_curline == (#b)([-0-9]##)([[:alpha:]]#)(:*)# ]]; then
 	    ZTST_xstatus=$match[1]
 	    ZTST_flags=$match[2]
@@ -246,29 +325,38 @@ $ZTST_curline"
 	  else
 	    ZTST_testfailed "expecting test status at:
 $ZTST_curline"
+	    return 1
 	  fi
 	  ZTST_getline
 	  found=1
 	  ;;
-	'<'*) ZTST_getredir
+	('<'*) ZTST_getredir || return 1
 	  found=1
 	  ;;
-	'>'*) ZTST_getredir
+	('>'*) ZTST_getredir || return 1
 	  found=1
 	  ;;
-	'?'*) ZTST_getredir
+	('?'*) ZTST_getredir || return 1
 	  found=1
 	  ;;
-	*) ZTST_testfailed "bad line in test block:
+	('F:'*) ZTST_failmsg="${ZTST_failmsg:+${ZTST_failmsg}
+}  ${ZTST_curline[3,-1]}"
+	  ZTST_getline
+	  found=1
+          ;;
+	(*) ZTST_testfailed "bad line in test block:
 $ZTST_curline"
+	  return 1
           ;;
       esac
     done
 
     # If we found some code to execute...
     if [[ -n $ZTST_code ]]; then
+      ZTST_hashmark
       ZTST_verbose 1 "Running test: $ZTST_message"
       ZTST_verbose 2 "ZTST_test: expecting status: $ZTST_xstatus"
+      ZTST_verbose 2 "Input: $ZTST_in, output: $ZTST_out, error: $ZTST_terr"
 
       ZTST_execchunk <$ZTST_in >$ZTST_tout 2>$ZTST_terr
 
@@ -278,6 +366,7 @@ $ZTST_curline"
 $ZTST_code${$(<$ZTST_terr):+
 Error output:
 $(<$ZTST_terr)}"
+	return 1
       fi
 
       ZTST_verbose 2 "ZTST_test: test produced standard output:
@@ -286,15 +375,17 @@ ZTST_test: and standard error:
 $(<$ZTST_terr)"
 
       # Now check output and error.
-      if [[ $ZTST_flags != *d* ]] && ! diff -c $ZTST_out $ZTST_tout; then
+      if [[ $ZTST_flags != *d* ]] && ! ZTST_diff -c $ZTST_out $ZTST_tout; then
 	ZTST_testfailed "output differs from expected as shown above for:
 $ZTST_code${$(<$ZTST_terr):+
 Error output:
 $(<$ZTST_terr)}"
+	return 1
       fi
-      if [[ $ZTST_flags != *D* ]] && ! diff -c $ZTST_err $ZTST_terr; then
+      if [[ $ZTST_flags != *D* ]] && ! ZTST_diff -c $ZTST_err $ZTST_terr; then
 	ZTST_testfailed "error output differs from expected as shown above for:
 $ZTST_code"
+	return 1
       fi
     fi
     ZTST_verbose 1 "Test successful."
@@ -312,35 +403,52 @@ $ZTST_code"
 typeset -A ZTST_sects
 ZTST_sects=(prep 0 test 0 clean 0)
 
+print "$ZTST_testname: starting."
+
 # Now go through all the different sections until the end.
-while ZTST_getsect; do
+# prep section may set ZTST_unimplemented, in this case the actual
+# tests will be skipped
+ZTST_skipok=
+ZTST_unimplemented=
+while [[ -z "$ZTST_unimplemented" ]] && ZTST_getsect $ZTST_skipok; do
   case $ZTST_cursect in
-    prep) if (( ${ZTST_sects[prep]} + ${ZTST_sects[test]} + \
+    (prep) if (( ${ZTST_sects[prep]} + ${ZTST_sects[test]} + \
 	        ${ZTST_sects[clean]} )); then
 	    ZTST_testfailed "\`prep' section must come first"
+            exit 1
 	  fi
 	  ZTST_prepclean
 	  ZTST_sects[prep]=1
 	  ;;
-    test)
+    (test)
 	  if (( ${ZTST_sects[test]} + ${ZTST_sects[clean]} )); then
 	    ZTST_testfailed "bad placement of \`test' section"
+	    exit 1
 	  fi
+	  # careful here: we can't execute ZTST_test before || or &&
+	  # because that affects the behaviour of traps in the tests.
 	  ZTST_test
+	  (( $? )) && ZTST_skipok=1
 	  ZTST_sects[test]=1
 	  ;;
-    clean)
+    (clean)
 	   if (( ${ZTST_sects[test]} == 0 || ${ZTST_sects[clean]} )); then
 	     ZTST_testfailed "bad use of \`clean' section"
+	   else
+	     ZTST_prepclean 1
+	     ZTST_sects[clean]=1
 	   fi
-	   ZTST_prepclean 1
-	   ZTST_sects[clean]=1
+	   ZTST_skipok=
 	   ;;
     *) ZTST_testfailed "bad section name: $ZTST_cursect"
        ;;
   esac
 done
 
-print "$ZTST_testname: all tests successful."
+if [[ -n "$ZTST_unimplemented" ]]; then
+  print "$ZTST_testname: skipped ($ZTST_unimplemented)"
+elif (( ! $ZTST_testfailed )); then
+  print "$ZTST_testname: all tests successful."
+fi
 ZTST_cleanup
-exit 0
+exit $(( ZTST_testfailed ))
-- 
cgit 1.4.1