From 20607774dc14faaa514623ef2a2f666911aa8b66 Mon Sep 17 00:00:00 2001
From: Peter Stephenson <pws@users.sourceforge.net>
Date: Mon, 17 Dec 2007 17:11:29 +0000
Subject: 24275: fixes for multibyte characters on Solaris

---
 ChangeLog              |  5 +++++
 Src/Zle/zle_utils.c    | 16 ++++++++++++++++
 Src/builtin.c          | 10 +++++++---
 Test/D07multibyte.ztst | 21 +++++++++++++++------
 4 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a74daa4d6..11d8293fd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2007-12-17  Peter Stephenson  <pws@csr.com>
 
+	* 24275: Src/builtin.c, Src/Zle/zle_utils.c,
+	Test/D07multibyte.ztst: Solaris returns the full character
+	length from mbrlen() etc. even if the call started in the
+	middle; bad characters are silently converted to a question mark.
+
 	* unposted: Config/version.mk: 4.3.4-dev-5.
 
 	* unposted: Src/lex.c: minor typo
diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c
index 6583ef503..a146b67c0 100644
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@@ -294,6 +294,16 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
 		 * (certainly true for Unicode and unlikely to be false
 		 * in any non-pathological multibyte representation). */
 		cnt = 1;
+	    } else if (cnt > ll) {
+		/*
+		 * Some multibyte implementations return the
+		 * full length of a previous incomplete character
+		 * instead of the remaining length.
+		 * This is paranoia: it only applies if we start
+		 * midway through a multibyte character, which
+		 * presumably can't happen.
+		 */
+		cnt = ll;
 	    }
 
 	    if (outcs) {
@@ -843,6 +853,12 @@ showmsg(char const *msg)
 		cnt = 1;
 		/* FALL THROUGH */
 	    default:
+		/*
+		 * Paranoia: only needed if we start in the middle
+		 * of a multibyte string and only in some implementations.
+		 */
+		if (cnt > ulen)
+		    cnt = ulen;
 		n = wcs_nicechar(c, &width, NULL);
 		break;
 	    }
diff --git a/Src/builtin.c b/Src/builtin.c
index 7bd4c6d83..8ded1c131 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -4927,7 +4927,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 		    break;
 		}
 		*bptr = (char) val;
-#ifdef MULTIBYTE_SUPPORT	
+#ifdef MULTIBYTE_SUPPORT
 		if (isset(MULTIBYTE)) {
 		    ret = mbrlen(bptr++, 1, &mbs);
 		    if (ret == MB_INVALID)
@@ -4954,8 +4954,8 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 		    eof = 1;
 		    break;
 		}
-	    
-#ifdef MULTIBYTE_SUPPORT	
+
+#ifdef MULTIBYTE_SUPPORT
 		if (isset(MULTIBYTE)) {
 		    while (val > 0) {
 			ret = mbrlen(bptr, val, &mbs);
@@ -4970,6 +4970,10 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 			    }
 			    else if (ret == 0) /* handle null as normal char */
 				ret = 1;
+			    else if (ret > val) {
+				/* Some mbrlen()s return the full char len */
+				ret = val;
+			    }
 			    nchars--;
 			    val -= ret;
 			    bptr += ret;
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 993e85b9b..2ebea2e10 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -388,9 +388,18 @@
 # This also isn't strictly multibyte and is here to reduce the
 # likelihood of a "can't do character set conversion" error.
   testfn() { (LC_ALL=C; print $'\u00e9') }
-  repeat 4 testfn
-1:error handling in Unicode quoting
-?testfn: character not in range
-?testfn: character not in range
-?testfn: character not in range
-?testfn: character not in range
+  repeat 4 testfn 2>&1 | while read line; do
+    if [[ $line = *"character not in range"* ]]; then
+      print OK
+    elif [[ $line = "?" ]]; then
+      print OK
+    else
+      print Failed: no error message and no question mark
+    fi
+  done
+  true
+0:error handling in Unicode quoting
+>OK
+>OK
+>OK
+>OK
-- 
cgit 1.4.1