From 25413054c11fc4b67284d4d6db48272182da1d26 Mon Sep 17 00:00:00 2001
From: Peter Stephenson <pws@users.sourceforge.net>
Date: Sat, 8 Mar 2008 01:20:49 +0000
Subject: 24699: bug with ${(Q)...} on initial ">" bug with ${(z)...} on string
 with unterminated "("

---
 ChangeLog              |  8 ++++++++
 Src/hist.c             | 33 +++++++++++++++++++++++++++++----
 Src/input.c            | 12 ++++++++++++
 Src/lex.c              | 18 +++++++++++++++++-
 Test/D04parameter.ztst | 28 ++++++++++++++++++++++++++++
 5 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 10a7b8092..70ee0fedb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2008-03-08  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 24699: Src/hist.c, Src/input.c, Src/lex.c,
+	Test/D04parameter.ztst: fix two bugs in the hacked up parsing
+	of strings for parameter substitution: ${(Q)...} lost an initial
+	">" and ${(z)...} on a string with an opening parenthesis but
+	no closing parenthesis appended an extra space.
+
 2008-03-07  Peter Stephenson  <p.w.stephenson@ntlworld.com>
 
 	* users/12699: Src/Zle/computil.c: don't complete normal
diff --git a/Src/hist.c b/Src/hist.c
index 1e4e6f3ba..806bd9be3 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2456,7 +2456,7 @@ bufferwords(LinkList list, char *buf, int *index)
     int num = 0, cur = -1, got = 0, ne = noerrs;
     int owb = wb, owe = we, oadx = addedx, ozp = zleparse, onc = nocomments;
     int ona = noaliases, ocs = zlemetacs, oll = zlemetall;
-    char *p;
+    char *p, *addedspaceptr;
 
     if (!list)
 	list = newlinklist();
@@ -2470,7 +2470,15 @@ bufferwords(LinkList list, char *buf, int *index)
 
 	p = (char *) zhalloc(l + 2);
 	memcpy(p, buf, l);
-	p[l] = ' ';
+	/*
+	 * I'm sure this space is here for a reason, but it's
+	 * a pain in the neck:  when we get back a string that's
+	 * not finished it's very hard to tell if a space at the
+	 * end is this one or not.  We use two tricks below to
+	 * work around this.
+	 */
+	addedspaceptr = p + l;
+	*addedspaceptr = ' ';
 	p[l + 1] = '\0';
 	inpush(p, 0, NULL);
 	zlemetall = strlen(p) ;
@@ -2493,7 +2501,8 @@ bufferwords(LinkList list, char *buf, int *index)
 	    p = (char *) zhalloc(hptr - chline + ll + 2);
 	    memcpy(p, chline, hptr - chline);
 	    memcpy(p + (hptr - chline), linein, ll);
-	    p[(hptr - chline) + ll] = ' ';
+	    addedspaceptr = p + (hptr - chline) + ll;
+	    *addedspaceptr = ' ';
 	    p[(hptr - chline) + zlemetall] = '\0';
 	    inpush(p, 0, NULL);
 
@@ -2506,7 +2515,8 @@ bufferwords(LinkList list, char *buf, int *index)
 	} else {
 	    p = (char *) zhalloc(ll + 2);
 	    memcpy(p, linein, ll);
-	    p[ll] = ' ';
+	    addedspaceptr = p + ll;
+	    *addedspaceptr = ' ';
 	    p[zlemetall] = '\0';
 	    inpush(p, 0, NULL);
 	}
@@ -2526,6 +2536,21 @@ bufferwords(LinkList list, char *buf, int *index)
 	    break;
 	if (tokstr && *tokstr) {
 	    untokenize((p = dupstring(tokstr)));
+	    if (ingetptr() > addedspaceptr) {
+		/*
+		 * Whoops, we've read past the space we added, probably
+		 * because we were expecting a terminator but when
+		 * it didn't turn up and shrugged our shoulders thinking
+		 * it might as well be a complete string anyway.
+		 * So remove the space.  C.f. below for the case
+		 * where the missing terminator caused a lex error.
+		 * We use the same paranoid test.
+		 */
+		int plen = strlen(p);
+		if (plen && p[plen-1] == ' ' &&
+		    (plen == 1 || p[plen-2] != Meta))
+		    p[plen-1] = '\0';
+	    }
 	    addlinknode(list, p);
 	    num++;
 	} else if (buf) {
diff --git a/Src/input.c b/Src/input.c
index 99db53e54..d6fd8d089 100644
--- a/Src/input.c
+++ b/Src/input.c
@@ -566,3 +566,15 @@ inpopalias(void)
     while (inbufflags & INP_ALIAS)
 	inpoptop();
 }
+
+
+/*
+ * Get pointer to remaining string to read.
+ */
+
+/**/
+char *
+ingetptr(void)
+{
+    return inbufptr;
+}
diff --git a/Src/lex.c b/Src/lex.c
index 4128f109a..739a6f391 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -914,6 +914,19 @@ gettok(void)
     return gettokstr(c, 0);
 }
 
+/*
+ * Get the remains of a token string.  This has two uses.
+ * When called from gettok(), with sub = 0, we have already identified
+ * any interesting initial character and want to get the rest of
+ * what we now know is a string.  However, the string may still include
+ * metacharacters and potentially substitutions.
+ *
+ * When called from parse_subst_string() with sub = 1, we are not
+ * fully parsing a command line, merely tokenizing a string.
+ * In this case we always add characters to the parsed string
+ * unless there is a parse error.
+ */
+
 /**/
 static int
 gettokstr(int c, int sub)
@@ -1134,7 +1147,10 @@ gettokstr(int c, int sub)
 	    if (e != '(') {
 		hungetc(e);
 		lexstop = 0;
-		goto brk;
+		if (in_brace_param || sub)
+		    break;
+		else
+		    goto brk;
 	    }
 	    add(Outang);
 	    if (skipcomm()) {
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 6775803a7..57b87d687 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -333,6 +333,34 @@
 0:${(Q)...} with handling of $'...'
 >XABY
 
+  # The following may look a bit random.
+  # For the split we are checking that anything that
+  # would normally be followed by a different word has
+  # an argument break after it and anything that doesn't doesn't.
+  # For the (Q) we are simply checking that nothing disappears
+  # in the parsing.
+  foo='<five> {six} (seven) >eight< }nine{ |forty-two| $many$ )ten( more'
+  array=(${(z)foo})
+  print -l ${(Q)array}
+0:${(z)...} and ${(Q)...} for some hard to parse cases
+><
+>five
+>>
+>{six}
+>(
+>seven
+>)
+>>
+>eight
+><
+>}nine{
+>|
+>forty-two
+>|
+>$many$
+>)
+>ten( more
+
   psvar=(dog)
   setopt promptsubst
   foo='It shouldn'\''t $(happen) to a %1v.'
-- 
cgit 1.4.1