diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | Src/utils.c | 207 | ||||
-rw-r--r-- | Test/A03quoting.ztst | 10 | ||||
-rw-r--r-- | Test/D07multibyte.ztst | 10 |
4 files changed, 160 insertions, 73 deletions
diff --git a/ChangeLog b/ChangeLog index cbe2ac8ce..b1761d64f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2007-11-06 Peter Stephenson <p.w.stephenson@ntlworld.com> + + * 24070: Src/utils.c, Test/A03quoting.ztst, + Test/D07multibyte.ztst: Some fixes for \u handling in + getkeystring(). + 2007-11-06 Peter Stephenson <pws@csr.com> * 24069: Doc/Zsh/mod_curses.yo, Src/Modules/curses.c: add diff --git a/Src/utils.c b/Src/utils.c index 9825dfbc2..60aa730e5 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -4578,6 +4578,31 @@ ucs4toutf8(char *dest, unsigned int wval) } #endif + +/* + * The following only occurs once or twice in the code, but in different + * places depending how character set conversion is implemented. + */ +#define CHARSET_FAILED() \ + if (how & GETKEY_DOLLAR_QUOTE) { \ + while ((*tdest++ = *++s)) { \ + if (how & GETKEY_UPDATE_OFFSET) { \ + if (s - sstart > *misc) \ + (*misc)++; \ + } \ + if (*s == Snull) { \ + *len = (s - sstart) + 1; \ + *tdest = '\0'; \ + return buf; \ + } \ + } \ + *len = tdest - buf; \ + return buf; \ + } \ + *t = '\0'; \ + *len = t - buf; \ + return buf + /* * Decode a key string, turning it into the literal characters. * The value returned is a newly allocated string from the heap. @@ -4622,7 +4647,7 @@ mod_export char * getkeystring(char *s, int *len, int how, int *misc) { char *buf, tmp[1]; - char *t, *tdest = NULL, *u = NULL, *sstart = s; + char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf; char svchar = '\0'; int meta = 0, control = 0; int i; @@ -4642,38 +4667,69 @@ getkeystring(char *s, int *len, int how, int *misc) #endif DPUTS((how & GETKEY_UPDATE_OFFSET) && - (how & ~(GETKEY_DOLLAR_QUOTE|GETKEY_UPDATE_OFFSET)), + (how & ~(GETKEYS_DOLLARS_QUOTE|GETKEY_UPDATE_OFFSET)), "BUG: offset updating in getkeystring only supported with $'."); + DPUTS((how & (GETKEY_DOLLAR_QUOTE|GETKEY_SINGLE_CHAR)) == + (GETKEY_DOLLAR_QUOTE|GETKEY_SINGLE_CHAR), + "BUG: incompatible options in getkeystring"); if (how & GETKEY_SINGLE_CHAR) t = buf = tmp; - else - t = buf = zhalloc(strlen(s) + 1); - if (how & GETKEY_DOLLAR_QUOTE) { + else { + /* Length including terminating NULL */ + int maxlen = 1; /* - * TODO: we're not necessarily guaranteed the output string will + * We're not necessarily guaranteed the output string will * be no longer than the input with \u and \U when output - * characters need to be metafied: should check the maximum - * length. - * - * We're going to unmetafy into the original string, but - * to get a proper metafied input we're going to metafy - * into an allocated buffer. This is necessary if we have - * \u and \U's with multiple metafied bytes. We can't - * simply remetafy the entire string because there may - * be tokens (indeed, we know there are lexical nulls floating - * around), so we have to be aware character by character - * what we are converting. + * characters need to be metafied. As this is the only + * case where the string can get longer (?I think), + * include it in the allocation length here but don't + * bother taking account of other factors. */ - tdest = t; - t = s; + for (t = s; *t; t++) { + if (*t == '\\') { + if (!t[1]) { + maxlen++; + break; + } + if (t[1] == 'u' || t[1] == 'U') + maxlen += MB_CUR_MAX * 2; + else + maxlen += 2; + /* skip the backslash and the following character */ + t++; + } else + maxlen++; + } + if (how & GETKEY_DOLLAR_QUOTE) { + /* + * We're going to unmetafy into a new string, but + * to get a proper metafied input we're going to metafy + * into an intermediate buffer. This is necessary if we have + * \u and \U's with multiple metafied bytes. We can't + * simply remetafy the entire string because there may + * be tokens (indeed, we know there are lexical nulls floating + * around), so we have to be aware character by character + * what we are converting. + * + * In this case, buf is the final buffer (as usual), + * but t points into a temporary buffer that just has + * to be long enough to hold the result of one escape + * code transformation. We count this is a full multibyte + * character (MB_CUR_MAX) with every character metafied + * (*2) plus a little bit of fuzz (for e.g. the odd backslash). + */ + buf = tdest = zhalloc(maxlen); + t = tbuf = zhalloc(MB_CUR_MAX * 3 + 1); + } else { + t = buf = zhalloc(maxlen); + } } for (; *s; s++) { - char *torig = t; if (*s == '\\' && s[1]) { int miscadded; - if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) { - (*misc)++; + if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) { + (*misc)--; miscadded = 1; } else miscadded = 0; @@ -4707,7 +4763,7 @@ getkeystring(char *s, int *len, int how, int *misc) if (!(how & GETKEY_EMACS)) { *t++ = '\\', s--; if (miscadded) - (*misc)--; + (*misc)++; continue; } /* FALL THROUGH */ @@ -4715,30 +4771,32 @@ getkeystring(char *s, int *len, int how, int *misc) *t++ = '\033'; break; case 'M': + /* HERE: GETKEY_UPDATE_OFFSET */ if (how & GETKEY_EMACS) { if (s[1] == '-') s++; meta = 1 + control; /* preserve the order of ^ and meta */ } else { if (miscadded) - (*misc)--; + (*misc)++; *t++ = '\\', s--; } continue; case 'C': + /* HERE: GETKEY_UPDATE_OFFSET */ if (how & GETKEY_EMACS) { if (s[1] == '-') s++; control = 1; } else { if (miscadded) - (*misc)--; + (*misc)++; *t++ = '\\', s--; } continue; case Meta: if (miscadded) - (*misc)--; + (*misc)++; *t++ = '\\', s--; break; case '-': @@ -4755,15 +4813,16 @@ getkeystring(char *s, int *len, int how, int *misc) return buf; } goto def; - case 'u': - if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) - (*misc) += 4; case 'U': - if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) { - (*misc) += 6; + if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) + (*misc) -= 4; + /* FALLTHROUGH */ + case 'u': + if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) { + (*misc) -= 6; /* HERE don't really believe this */ /* * We've now adjusted the offset for all the input - * characters, so we need to subtract for each + * characters, so we need to add for each * byte of output below. */ } @@ -4787,31 +4846,18 @@ getkeystring(char *s, int *len, int how, int *misc) count = wctomb(t, (wchar_t)wval); if (count == -1) { zerr("character not in range"); - if (how & GETKEY_DOLLAR_QUOTE) { - /* HERE new convention */ - for (u = t; (*u++ = *++s);) { - if ((how & GETKEY_UPDATE_OFFSET) && - s - sstart > *misc) - (*misc)++; - } - return t; - } - *t = '\0'; - *len = t - buf; - return buf; + CHARSET_FAILED(); } - if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) + if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) (*misc) += count; t += count; - continue; # else # if defined(HAVE_NL_LANGINFO) && defined(CODESET) if (!strcmp(nl_langinfo(CODESET), "UTF-8")) { count = ucs4toutf8(t, wval); t += count; - if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) + if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) (*misc) += count; - continue; } else { # ifdef HAVE_ICONV ICONV_CONST char *inptr = inbuf; @@ -4826,46 +4872,55 @@ getkeystring(char *s, int *len, int how, int *misc) cd = iconv_open(nl_langinfo(CODESET), "UCS-4BE"); if (cd == (iconv_t)-1) { zerr("cannot do charset conversion"); - if (how & GETKEY_DOLLAR_QUOTE) { - /* HERE: new convention */ - for (u = t; (*u++ = *++s);) { - if ((how & GETKEY_UPDATE_OFFSET) && - s - sstart > *misc) - (*misc)++; - } - return t; - } - *t = '\0'; - *len = t - buf; - return buf; + CHARSET_FAILED(); } count = iconv(cd, &inptr, &inbytes, &t, &outbytes); iconv_close(cd); if (count == (size_t)-1) { zerr("character not in range"); - *t = '\0'; - *len = t - buf; - return buf; + CHARSET_FAILED(); } - if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) + if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) (*misc) += count; - continue; # else zerr("cannot do charset conversion"); - *t = '\0'; - *len = t - buf; - return buf; + CHARSET_FAILED(); # endif } # else zerr("cannot do charset conversion"); - *t = '\0'; - *len = t - buf; - return buf; + CHARSET_FAILED(); # endif # endif + if (how & GETKEY_DOLLAR_QUOTE) { + char *t2; + for (t2 = tbuf; t2 < t; t2++) { + if (imeta(*t2)) { + *tdest++ = Meta; + *tdest++ = *t2 ^ 32; + } else + *tdest++ = *t2; + } + /* reset temporary buffer after handling */ + t = tbuf; + } + continue; + case '\'': + case '\\': + if (how & GETKEY_DOLLAR_QUOTE) { + /* + * Usually \' and \\ will have the initial + * \ turned into a Bnull, however that's not + * necessarily the case when called from + * completion. + */ + *t++ = *s; + break; + } + /* FALLTHROUGH */ default: def: + /* HERE: GETKEY_UPDATE_OFFSET? */ if ((idigit(*s) && *s < '8') || *s == 'x') { if (!(how & GETKEY_OCTAL_ESC)) { if (*s == '0') @@ -4890,7 +4945,7 @@ getkeystring(char *s, int *len, int how, int *misc) } else { if (!(how & GETKEY_EMACS) && *s != '\\') { if (miscadded) - (*misc)--; + (*misc)++; *t++ = '\\'; } *t++ = *s; @@ -4961,6 +5016,8 @@ getkeystring(char *s, int *len, int how, int *misc) */ *tdest++ = *++s; } + /* reset temporary buffer, now handled */ + t = tbuf; continue; } else *t++ = *s; @@ -4984,13 +5041,17 @@ getkeystring(char *s, int *len, int how, int *misc) } if (how & GETKEY_DOLLAR_QUOTE) { char *t2; - for (t2 = torig; t2 < t; t2++) { + for (t2 = tbuf; t2 < t; t2++) { if (imeta(*t2)) { *tdest++ = Meta; *tdest++ = *t2 ^ 32; } else *tdest++ = *t2; } + /* + * Reset use of temporary buffer. + */ + t = tbuf; } if ((how & GETKEY_SINGLE_CHAR) && t != tmp) { *misc = STOUC(tmp[0]); diff --git a/Test/A03quoting.ztst b/Test/A03quoting.ztst index 54005ebe3..f0f86e0b2 100644 --- a/Test/A03quoting.ztst +++ b/Test/A03quoting.ztst @@ -42,3 +42,13 @@ unsetopt rcquotes 0:Yes RC_QUOTES with single quotes >' + + print '<\u0041>' + printf '%s\n' $'<\u0042>' + print '<\u0043>' + printf '%s\n' $'<\u0044>' +0:\u in both print and printf +><A> +><B> +><C> +><D> diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst index 7348bbc2e..993e85b9b 100644 --- a/Test/D07multibyte.ztst +++ b/Test/D07multibyte.ztst @@ -384,3 +384,13 @@ print -r ${(q)foo} 0:Backslash-quoting of unprintable/invalid characters uses $'...' >X$'\300'Y$'\a'Z$'\177'T + +# This also isn't strictly multibyte and is here to reduce the +# likelihood of a "can't do character set conversion" error. + testfn() { (LC_ALL=C; print $'\u00e9') } + repeat 4 testfn +1:error handling in Unicode quoting +?testfn: character not in range +?testfn: character not in range +?testfn: character not in range +?testfn: character not in range |