24856: add IS_COMBINING() and IS_BASECHAR() and widen tests

author: Peter Stephenson <pws@users.sourceforge.net> 2008-04-21 11:49:55 +0000
committer: Peter Stephenson <pws@users.sourceforge.net> 2008-04-21 11:49:55 +0000
commit: 5a0c547e919bded1d4966213beb9a3ae89b08698 (patch)
tree: 2dcda2ad23f564cb78685345dc86336d2d87a584
parent: 500c402380849d40d6e0eabd6cc063add47a57af (diff)
download: zsh-5a0c547e919bded1d4966213beb9a3ae89b08698.tar.gz
zsh-5a0c547e919bded1d4966213beb9a3ae89b08698.tar.xz
zsh-5a0c547e919bded1d4966213beb9a3ae89b08698.zip
5 files changed, 50 insertions, 26 deletions
diff --git a/ChangeLog b/ChangeLog
index a53b11dd1..8d0711630 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2008-04-21  Peter Stephenson  <pws@csr.com>
 
+	* 24856: Src/utils.c, Src/zsh.h, Src/Zle/zle_move.c,
+	Src/Zle/zle_refresh.c: use IS_COMBINING() and IS_BASECHAR()
+	tests for combining characters.  Widen definitions of characters
+	to least restrictive we think are likely to work.
+
 	* 24855: Src/Zle/zle_utils.c: tests for combining char aligments
 	should only be on metafied command line.
 
diff --git a/Src/Zle/zle_move.c b/Src/Zle/zle_move.c
index 2db703051..9b35660f1 100644
--- a/Src/Zle/zle_move.c
+++ b/Src/Zle/zle_move.c
@@ -54,22 +54,20 @@ alignmultiwordleft(int *pos, int setpos)
     if (!isset(COMBININGCHARS) || loccs == zlell || loccs == 0)
 	return 0;
 
-    /* need to be on zero-width punctuation character */
-    if (!iswpunct(zleline[loccs]) || wcwidth(zleline[loccs]) != 0)
+    /* need to be on combining character */
+    if (!IS_COMBINING(zleline[loccs]))
 	 return 0;
 
     /* yes, go left */
     loccs--;
 
     for (;;) {
-	/* second test here is paranoia */
-	if (iswalnum(zleline[loccs]) && wcwidth(zleline[loccs]) > 0) {
+	if (IS_BASECHAR(zleline[loccs])) {
 	    /* found start position */
 	    if (setpos)
 		*pos = loccs;
 	    return 1;
-	} else if (!iswpunct(zleline[loccs]) ||
-		   wcwidth(zleline[loccs]) != 0) {
+	} else if (!IS_COMBINING(zleline[loccs])) {
 	    /* no go */
 	    return 0;
 	}
@@ -103,7 +101,7 @@ alignmultiwordright(int *pos, int setpos)
 
     while (loccs < zlell) {
 	/* Anything other than a combining char will do here */
-	if (!iswpunct(zleline[loccs]) || wcwidth(zleline[loccs]) != 0) {
+	if (!IS_COMBINING(zleline[loccs])) {
 	    if (setpos)
 		*pos = loccs;
 	    return 1;
@@ -221,16 +219,14 @@ backwardmetafiedchar(char *start, char *endptr, convchar_t *retchr)
 		    *retchr = wc;
 		return ptr;
 	    }
- 	    /* HERE: test for combining char, fix when test changes */
-	    if (!iswpunct(wc) || wcwidth(wc) != 0) {
+	    if (!IS_COMBINING(wc)) {
 		/* not a combining character... */
 		if (last) {
 		    /*
 		     * ... but we were looking for a suitable base character,
 		     * test it.
 		     */
-		    /* HERE this test will change too */
-		    if (iwsalnum(wc) && wcwidth(wc) > 0) {
+		    if (IS_BASECHAR(wc)) {
 			/*
 			 * Yes, this will do.
 			 */
diff --git a/Src/Zle/zle_refresh.c b/Src/Zle/zle_refresh.c
index b9e5723c9..a9bc017f8 100644
--- a/Src/Zle/zle_refresh.c
+++ b/Src/Zle/zle_refresh.c
@@ -1245,13 +1245,12 @@ zrefresh(void)
 		    rpms.nvcs = rpms.s - nbuf[rpms.nvln = rpms.ln];
 		}
 	    }
-	    if (isset(COMBININGCHARS) && iswalnum(*t)) {
+	    if (isset(COMBININGCHARS) && IS_BASECHAR(*t)) {
 		/*
-		 * Look for combining characters:  trailing punctuation
-		 * characters with printing width zero.
+		 * Look for combining characters.
 		 */
 		for (ichars = 1; tmppos + ichars < tmpll; ichars++) {
-		    if (!iswpunct(t[ichars]) || wcwidth(t[ichars]) != 0)
+		    if (!IS_COMBINING(t[ichars]))
 			break;
 		}
 	    } else
@@ -2267,9 +2266,8 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs)
 #ifdef MULTIBYTE_SUPPORT
 	else if (iswprint(tmpline[t0]) && (width = wcwidth(tmpline[t0]) > 0)) {
 	    vsiz += width;
-	    if (isset(COMBININGCHARS) && iswalnum(tmpline[t0])) {
-		while (t0 < tmpll-1 && iswpunct(tmpline[t0+1]) &&
-		       wcwidth(tmpline[t0+1]) == 0)
+	    if (isset(COMBININGCHARS) && IS_BASECHAR(tmpline[t0])) {
+		while (t0 < tmpll-1 && IS_COMBINING(tmpline[t0+1]))
 		    t0++;
 	    }
 	}
@@ -2344,14 +2342,12 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs)
 	} else if (iswprint(tmpline[t0]) &&
 		   (width = wcwidth(tmpline[t0])) > 0) {
 	    int ichars;
-	    if (isset(COMBININGCHARS) && iswalnum(tmpline[t0])) {
+	    if (isset(COMBININGCHARS) && IS_BASECHAR(tmpline[t0])) {
 		/*
-		 * Look for combining characters:  trailing printable
-		 * characters with printing width zero.
+		 * Look for combining characters.
 		 */
 		for (ichars = 1; t0 + ichars < tmpll; ichars++) {
-		    if (!iswpunct(tmpline[t0+ichars]) ||
-			wcwidth(tmpline[t0+ichars]) != 0)
+		    if (!IS_COMBINING(tmpline[t0+ichars]))
 			break;
 		}
 	    } else
diff --git a/Src/utils.c b/Src/utils.c
index 59a496fcf..24e709c24 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -3088,7 +3088,7 @@ wcsitype(wchar_t c, int itype)
 	     * logically they are still part of the word, even if they
 	     * don't get displayed properly, so always do this.
 	     */
-	    if (iswpunct(c) && wcwidth(c) == 0)
+	    if (IS_COMBINING(c))
 		return 1;
 	    return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len);
 
diff --git a/Src/zsh.h b/Src/zsh.h
index f91d27680..eaeacad8e 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -2281,7 +2281,34 @@ typedef wint_t convchar_t;
 #define ZWC(c)	L ## c
 #define ZWS(s)	L ## s
 
-#else
+/*
+ * Test for a combining character.
+ *
+ * wc is assumed to be a wchar_t (i.e. we don't need zwcwidth).
+ *
+ * This may need to be more careful if we import a wcwidth() for
+ * compatibility to try to avoid clashes with the system library.
+ *
+ * Pedantic note: in Unicode, a combining character need not be
+ * zero length.  However, we are concerned here about display;
+ * we simply need to know whether the character will be displayed
+ * on top of another one.  We use "combining character" in this
+ * sense throughout the shell.  I am not aware of a way of
+ * detecting the Unicode trait in standard libraries.
+ */
+#define IS_COMBINING(wc)	(wcwidth(wc) == 0)
+/*
+ * Test for the base of a combining character.
+ *
+ * We assume a combining character can be successfully displayed with
+ * any non-space printable character, which is what a graphic character
+ * is, as long as it has non-zero width.  We need to avoid all forms of
+ * space because the shell will split words on any whitespace.
+ */
+#define IS_BASECHAR(wc)		(iswgraph(wc) && wcwidth(wc) > 0)
+
+#else /* not MULTIBYTE_SUPPORT */
+
 #define MB_METACHARINIT()
 typedef int convchar_t;
 #define MB_METACHARLENCONV(str, cp)	metacharlenconv((str), (cp))
@@ -2296,4 +2323,4 @@ typedef int convchar_t;
 #define ZWC(c)	c
 #define ZWS(s)	s
 
-#endif
+#endif /* MULTIBYTE_SUPPORT */
author	Peter Stephenson <pws@users.sourceforge.net>	2008-04-21 11:49:55 +0000
committer	Peter Stephenson <pws@users.sourceforge.net>	2008-04-21 11:49:55 +0000
commit	5a0c547e919bded1d4966213beb9a3ae89b08698 (patch)
tree	2dcda2ad23f564cb78685345dc86336d2d87a584
parent	500c402380849d40d6e0eabd6cc063add47a57af (diff)
download	zsh-5a0c547e919bded1d4966213beb9a3ae89b08698.tar.gz zsh-5a0c547e919bded1d4966213beb9a3ae89b08698.tar.xz zsh-5a0c547e919bded1d4966213beb9a3ae89b08698.zip