From 20607774dc14faaa514623ef2a2f666911aa8b66 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 17 Dec 2007 17:11:29 +0000 Subject: 24275: fixes for multibyte characters on Solaris --- ChangeLog | 5 +++++ Src/Zle/zle_utils.c | 16 ++++++++++++++++ Src/builtin.c | 10 +++++++--- Test/D07multibyte.ztst | 21 +++++++++++++++------ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index a74daa4d6..11d8293fd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2007-12-17 Peter Stephenson + * 24275: Src/builtin.c, Src/Zle/zle_utils.c, + Test/D07multibyte.ztst: Solaris returns the full character + length from mbrlen() etc. even if the call started in the + middle; bad characters are silently converted to a question mark. + * unposted: Config/version.mk: 4.3.4-dev-5. * unposted: Src/lex.c: minor typo diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c index 6583ef503..a146b67c0 100644 --- a/Src/Zle/zle_utils.c +++ b/Src/Zle/zle_utils.c @@ -294,6 +294,16 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs) * (certainly true for Unicode and unlikely to be false * in any non-pathological multibyte representation). */ cnt = 1; + } else if (cnt > ll) { + /* + * Some multibyte implementations return the + * full length of a previous incomplete character + * instead of the remaining length. + * This is paranoia: it only applies if we start + * midway through a multibyte character, which + * presumably can't happen. + */ + cnt = ll; } if (outcs) { @@ -843,6 +853,12 @@ showmsg(char const *msg) cnt = 1; /* FALL THROUGH */ default: + /* + * Paranoia: only needed if we start in the middle + * of a multibyte string and only in some implementations. + */ + if (cnt > ulen) + cnt = ulen; n = wcs_nicechar(c, &width, NULL); break; } diff --git a/Src/builtin.c b/Src/builtin.c index 7bd4c6d83..8ded1c131 100644 --- a/Src/builtin.c +++ b/Src/builtin.c @@ -4927,7 +4927,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func)) break; } *bptr = (char) val; -#ifdef MULTIBYTE_SUPPORT +#ifdef MULTIBYTE_SUPPORT if (isset(MULTIBYTE)) { ret = mbrlen(bptr++, 1, &mbs); if (ret == MB_INVALID) @@ -4954,8 +4954,8 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func)) eof = 1; break; } - -#ifdef MULTIBYTE_SUPPORT + +#ifdef MULTIBYTE_SUPPORT if (isset(MULTIBYTE)) { while (val > 0) { ret = mbrlen(bptr, val, &mbs); @@ -4970,6 +4970,10 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func)) } else if (ret == 0) /* handle null as normal char */ ret = 1; + else if (ret > val) { + /* Some mbrlen()s return the full char len */ + ret = val; + } nchars--; val -= ret; bptr += ret; diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst index 993e85b9b..2ebea2e10 100644 --- a/Test/D07multibyte.ztst +++ b/Test/D07multibyte.ztst @@ -388,9 +388,18 @@ # This also isn't strictly multibyte and is here to reduce the # likelihood of a "can't do character set conversion" error. testfn() { (LC_ALL=C; print $'\u00e9') } - repeat 4 testfn -1:error handling in Unicode quoting -?testfn: character not in range -?testfn: character not in range -?testfn: character not in range -?testfn: character not in range + repeat 4 testfn 2>&1 | while read line; do + if [[ $line = *"character not in range"* ]]; then + print OK + elif [[ $line = "?" ]]; then + print OK + else + print Failed: no error message and no question mark + fi + done + true +0:error handling in Unicode quoting +>OK +>OK +>OK +>OK -- cgit 1.4.1