From 25413054c11fc4b67284d4d6db48272182da1d26 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Sat, 8 Mar 2008 01:20:49 +0000 Subject: 24699: bug with ${(Q)...} on initial ">" bug with ${(z)...} on string with unterminated "(" --- ChangeLog | 8 ++++++++ Src/hist.c | 33 +++++++++++++++++++++++++++++---- Src/input.c | 12 ++++++++++++ Src/lex.c | 18 +++++++++++++++++- Test/D04parameter.ztst | 28 ++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 10a7b8092..70ee0fedb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2008-03-08 Peter Stephenson + + * 24699: Src/hist.c, Src/input.c, Src/lex.c, + Test/D04parameter.ztst: fix two bugs in the hacked up parsing + of strings for parameter substitution: ${(Q)...} lost an initial + ">" and ${(z)...} on a string with an opening parenthesis but + no closing parenthesis appended an extra space. + 2008-03-07 Peter Stephenson * users/12699: Src/Zle/computil.c: don't complete normal diff --git a/Src/hist.c b/Src/hist.c index 1e4e6f3ba..806bd9be3 100644 --- a/Src/hist.c +++ b/Src/hist.c @@ -2456,7 +2456,7 @@ bufferwords(LinkList list, char *buf, int *index) int num = 0, cur = -1, got = 0, ne = noerrs; int owb = wb, owe = we, oadx = addedx, ozp = zleparse, onc = nocomments; int ona = noaliases, ocs = zlemetacs, oll = zlemetall; - char *p; + char *p, *addedspaceptr; if (!list) list = newlinklist(); @@ -2470,7 +2470,15 @@ bufferwords(LinkList list, char *buf, int *index) p = (char *) zhalloc(l + 2); memcpy(p, buf, l); - p[l] = ' '; + /* + * I'm sure this space is here for a reason, but it's + * a pain in the neck: when we get back a string that's + * not finished it's very hard to tell if a space at the + * end is this one or not. We use two tricks below to + * work around this. + */ + addedspaceptr = p + l; + *addedspaceptr = ' '; p[l + 1] = '\0'; inpush(p, 0, NULL); zlemetall = strlen(p) ; @@ -2493,7 +2501,8 @@ bufferwords(LinkList list, char *buf, int *index) p = (char *) zhalloc(hptr - chline + ll + 2); memcpy(p, chline, hptr - chline); memcpy(p + (hptr - chline), linein, ll); - p[(hptr - chline) + ll] = ' '; + addedspaceptr = p + (hptr - chline) + ll; + *addedspaceptr = ' '; p[(hptr - chline) + zlemetall] = '\0'; inpush(p, 0, NULL); @@ -2506,7 +2515,8 @@ bufferwords(LinkList list, char *buf, int *index) } else { p = (char *) zhalloc(ll + 2); memcpy(p, linein, ll); - p[ll] = ' '; + addedspaceptr = p + ll; + *addedspaceptr = ' '; p[zlemetall] = '\0'; inpush(p, 0, NULL); } @@ -2526,6 +2536,21 @@ bufferwords(LinkList list, char *buf, int *index) break; if (tokstr && *tokstr) { untokenize((p = dupstring(tokstr))); + if (ingetptr() > addedspaceptr) { + /* + * Whoops, we've read past the space we added, probably + * because we were expecting a terminator but when + * it didn't turn up and shrugged our shoulders thinking + * it might as well be a complete string anyway. + * So remove the space. C.f. below for the case + * where the missing terminator caused a lex error. + * We use the same paranoid test. + */ + int plen = strlen(p); + if (plen && p[plen-1] == ' ' && + (plen == 1 || p[plen-2] != Meta)) + p[plen-1] = '\0'; + } addlinknode(list, p); num++; } else if (buf) { diff --git a/Src/input.c b/Src/input.c index 99db53e54..d6fd8d089 100644 --- a/Src/input.c +++ b/Src/input.c @@ -566,3 +566,15 @@ inpopalias(void) while (inbufflags & INP_ALIAS) inpoptop(); } + + +/* + * Get pointer to remaining string to read. + */ + +/**/ +char * +ingetptr(void) +{ + return inbufptr; +} diff --git a/Src/lex.c b/Src/lex.c index 4128f109a..739a6f391 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -914,6 +914,19 @@ gettok(void) return gettokstr(c, 0); } +/* + * Get the remains of a token string. This has two uses. + * When called from gettok(), with sub = 0, we have already identified + * any interesting initial character and want to get the rest of + * what we now know is a string. However, the string may still include + * metacharacters and potentially substitutions. + * + * When called from parse_subst_string() with sub = 1, we are not + * fully parsing a command line, merely tokenizing a string. + * In this case we always add characters to the parsed string + * unless there is a parse error. + */ + /**/ static int gettokstr(int c, int sub) @@ -1134,7 +1147,10 @@ gettokstr(int c, int sub) if (e != '(') { hungetc(e); lexstop = 0; - goto brk; + if (in_brace_param || sub) + break; + else + goto brk; } add(Outang); if (skipcomm()) { diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 6775803a7..57b87d687 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -333,6 +333,34 @@ 0:${(Q)...} with handling of $'...' >XABY + # The following may look a bit random. + # For the split we are checking that anything that + # would normally be followed by a different word has + # an argument break after it and anything that doesn't doesn't. + # For the (Q) we are simply checking that nothing disappears + # in the parsing. + foo=' {six} (seven) >eight< }nine{ |forty-two| $many$ )ten( more' + array=(${(z)foo}) + print -l ${(Q)array} +0:${(z)...} and ${(Q)...} for some hard to parse cases +>< +>five +>> +>{six} +>( +>seven +>) +>> +>eight +>< +>}nine{ +>| +>forty-two +>| +>$many$ +>) +>ten( more + psvar=(dog) setopt promptsubst foo='It shouldn'\''t $(happen) to a %1v.' -- cgit 1.4.1