about summary refs log tree commit diff
path: root/Src
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2006-09-13 20:55:29 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2006-09-13 20:55:29 +0000
commita242b1eb35863b73cbc63699fafe920e8b92c858 (patch)
tree141db2c3c4a20d1a44d7fe357a39d0ba4aab9d4f /Src
parentefd061cdc9bdc0ba692387ec25eb6d01616d0425 (diff)
downloadzsh-a242b1eb35863b73cbc63699fafe920e8b92c858.tar.gz
zsh-a242b1eb35863b73cbc63699fafe920e8b92c858.tar.xz
zsh-a242b1eb35863b73cbc63699fafe920e8b92c858.zip
22705: make ${(l...)...} and ${(r...)...} handle multibyte characters
Diffstat (limited to 'Src')
-rw-r--r--Src/prompt.c18
-rw-r--r--Src/subst.c413
-rw-r--r--Src/utils.c88
-rw-r--r--Src/zsh.h2
4 files changed, 394 insertions, 127 deletions
diff --git a/Src/prompt.c b/Src/prompt.c
index 21dff16e0..974f70e40 100644
--- a/Src/prompt.c
+++ b/Src/prompt.c
@@ -1058,12 +1058,7 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar)
 	    int twidth, maxwidth;
 	    int ntrunc = strlen(t);
 
-#ifdef MULTIBYTE_SUPPORT
-	    /* Use screen width of string */
-	    twidth = mb_width(t);
-#else
-	    twidth = ztrlen(t);
-#endif
+	    twidth = MB_METASTRWIDTH(t);
 	    if (twidth < truncwidth) {
 		maxwidth = truncwidth - twidth;
 		/*
@@ -1130,7 +1125,7 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar)
 			     * Normal text: build up a multibyte character.
 			     */
 			    char inchar;
-			    wchar_t cc;
+			    wchar_t cc, wcw;
 
 			    /*
 			     * careful: string is still metafied (we
@@ -1156,7 +1151,9 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar)
 				remw--;
 				break;
 			    default:
-				remw -= wcwidth(cc);
+				wcw = wcwidth(cc);
+				if (wcw > 0)
+				    remw -= wcw;
 				break;
 			    }
 #else
@@ -1197,6 +1194,7 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar)
 #ifdef MULTIBYTE_SUPPORT
 			    char inchar;
 			    wchar_t cc;
+			    int wcw;
 
 			    if (*skiptext == Meta)
 				inchar = *++skiptext ^ 32;
@@ -1216,7 +1214,9 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar)
 				maxwidth--;
 				break;
 			    default:
-				maxwidth -= wcwidth(cc);
+				wcw = wcwidth(cc);
+				if (wcw > 0)
+				    maxwidth -= wcw;
 				break;
 			    }
 #else
diff --git a/Src/subst.c b/Src/subst.c
index 3a2c3e111..2be854524 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -718,12 +718,34 @@ invinstrpcmp(const void *a, const void *b)
     return -instrpcmp(a, b);
 }
 
+/*
+ * Pad the string str, returning a result from the heap (or str itself,
+ * if it didn't need padding).  If str is too large, it will be truncated.
+ * Calculations are in terms of width if MULTIBYTE is in effect, else
+ * characters.
+ *
+ * prenum and postnum are the width to which the string needs padding
+ * on the left and right.
+ *
+ * preone and postone are string to insert once only before and after
+ * str.  They will be truncated on the left or right, respectively,
+ * if necessary to fit the width.  Either or both may be NULL in which
+ * case they will not be used.
+ *
+ * premul and postmul are the padding strings to be repeated before
+ * on the left (if prenum is non-zero) and right (if postnum is non-zero).  If
+ * NULL the first character of IFS (typically but not necessarily a space)
+ * will be used.
+ */
+
 /**/
 static char *
-dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul)
+dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
+	  char *premul, char *postmul)
 {
     char *def, *ret, *t, *r;
-    int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc;
+    int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc, cl;
+    convchar_t cchar;
 
     MB_METACHARINIT();
     if (*ifs)
@@ -739,89 +761,357 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char
     if (!postmul || !*postmul)
 	postmul = def;
 
-    ls = strlen(str);
-    lpreone = preone ? strlen(preone) : 0;
-    lpostone = postone ? strlen(postone) : 0;
-    lpremul = strlen(premul);
-    lpostmul = strlen(postmul);
+    ls = MB_METASTRWIDTH(str);
+    lpreone = preone ? MB_METASTRWIDTH(preone) : 0;
+    lpostone = postone ? MB_METASTRWIDTH(postone) : 0;
+    lpremul = MB_METASTRWIDTH(premul);
+    lpostmul = MB_METASTRWIDTH(postmul);
 
-    lr = prenum + postnum;
-
-    if (lr == ls)
+    if (prenum + postnum == ls)
 	return str;
 
+    /*
+     * Try to be careful with allocated lengths.  The following
+     * is a maximum, in case we need the entire repeated string
+     * for each repetition.  We probably don't, but in case the user
+     * has given us something pathological which doesn't convert
+     * easily into a width we'd better be safe.
+     */
+    lr = strlen(str) + strlen(premul) * prenum + strlen(postmul) * postnum;
+    /*
+     * Same logic for preone and postone, except those may be NULL.
+     */
+    if (preone)
+	lr += strlen(preone);
+    if (postone)
+	lr += strlen(postone);
     r = ret = (char *)zhalloc(lr + 1);
 
     if (prenum) {
+	/*
+	 * Pad on the left.
+	 */
 	if (postnum) {
+	    /*
+	     * Pad on both right and left.
+	     * The strategy is to divide the string into two halves.
+	     * The first half is dealt with by the left hand padding
+	     * code, the second by the right hand.
+	     */
 	    ls2 = ls / 2;
 
+	    /* The width left to pad for the first half. */
 	    f = prenum - ls2;
-	    if (f <= 0)
-		for (str -= f, c = prenum; c--; *r++ = *str++);
-	    else {
-		if (f <= lpreone)
-		    for (c = f, t = preone + lpreone - f; c--; *r++ = *t++);
-		else {
+	    if (f <= 0) {
+		/* First half doesn't fit.  Skip the first -f width. */
+		f = -f;
+		MB_METACHARINIT();
+		while (f > 0) {
+		    str += MB_METACHARLENCONV(str, &cchar);
+		    f -= WCWIDTH(cchar);
+		}
+		/* Now finish the first half. */
+		for (c = prenum; c > 0; ) {
+		    cl = MB_METACHARLENCONV(str, &cchar);
+		    while (cl--)
+			*r++ = *str++;
+		    c -= WCWIDTH(cchar);
+		}
+	    } else {
+		if (f <= lpreone) {
+		    if (preone) {
+			/*
+			 * The unrepeated string doesn't fit.
+			 */
+			MB_METACHARINIT();
+			/* The width we need to skip */
+			f = lpreone - f;
+			/* So skip. */
+			for (t = preone; f > 0; ) {
+			    t += MB_METACHARLENCONV(t, &cchar);
+			    f -= WCWIDTH(cchar);
+			}
+			/* Then copy the entire remainder. */
+			while (*t)
+			    *r++ = *t++;
+		    }
+		} else {
 		    f -= lpreone;
-		    if ((m = f % lpremul))
-			for (c = m, t = premul + lpremul - m; c--; *r++ = *t++);
-		    for (cc = f / lpremul; cc--;)
-			for (c = lpremul, t = premul; c--; *r++ = *t++);
-		    for (c = lpreone; c--; *r++ = *preone++);
+		    if ((m = f % lpremul)) {
+			/*
+			 * Left over fraction of repeated string.
+			 */
+			MB_METACHARINIT();
+			/* Skip this much. */
+			m = lpremul - m;
+			for (t = premul; m > 0; ) {
+			    t += MB_METACHARLENCONV(t, &cchar);
+			    m -= WCWIDTH(cchar);
+			}
+			/* Output the rest. */
+			while (*t)
+			    *r++ = *t++;
+		    }
+		    for (cc = f / lpremul; cc--;) {
+			/* Repeat the repeated string */
+			MB_METACHARINIT();
+			for (c = lpremul, t = premul; c > 0; ) {
+			    cl = MB_METACHARLENCONV(t, &cchar);
+			    while (cl--)
+				*r++ = *t++;
+			    c -= WCWIDTH(cchar);
+			}
+		    }
+		    if (preone) {
+			/* Output the full unrepeated string */
+			while (*preone)
+			    *r++ = *preone++;
+		    }
+		}
+		/* Output the first half width of the original string. */
+		for (c = ls2; c > 0; ) {
+		    cl = MB_METACHARLENCONV(str, &cchar);
+		    c -= WCWIDTH(cchar);
+		    while (cl--)
+			*r++ = *str++;
 		}
-		for (c = ls2; c--; *r++ = *str++);
 	    }
+	    /* Other half.  In case the string had an odd length... */
 	    ls2 = ls - ls2;
+	    /* Width that needs padding... */
 	    f = postnum - ls2;
-	    if (f <= 0)
-		for (c = postnum; c--; *r++ = *str++);
-	    else {
-		for (c = ls2; c--; *r++ = *str++);
-		if (f <= lpostone)
-		    for (c = f; c--; *r++ = *postone++);
-		else {
-		    f -= lpostone;
-		    for (c = lpostone; c--; *r++ = *postone++);
-		    for (cc = f / lpostmul; cc--;)
-			for (c = lpostmul, t = postmul; c--; *r++ = *t++);
-		    if ((m = f % lpostmul))
-			for (; m--; *r++ = *postmul++);
+	    if (f <= 0) {
+		/* ...is negative, truncate original string */
+		MB_METACHARINIT();
+		for (c = postnum; c > 0; ) {
+		    cl = MB_METACHARLENCONV(str, &cchar);
+		    c -= WCWIDTH(cchar);
+		    while (cl--)
+			*r++ = *str++;
+		}
+	    } else {
+		/* Rest of original string fits, output it complete */
+		while (*str)
+		    *r++ = *str++;
+		if (f <= lpostone) {
+		    if (postone) {
+			/* Can't fit unrepeated string, truncate it */
+			for (c = f; c > 0; ) {
+			    cl = MB_METACHARLENCONV(postone, &cchar);
+			    c -= WCWIDTH(cchar);
+			    while (cl--)
+				*r++ = *postone++;
+			}
+		    }
+		} else {
+		    if (postone) {
+			f -= lpostone;
+			/* Output entire unrepeated string */
+			while (*postone)
+			    *r++ = *postone++;
+		    }
+		    for (cc = f / lpostmul; cc--;) {
+			/* Begin the beguine */
+			for (t = postmul; *t; )
+			    *r++ = *t++;
+		    }
+		    if ((m = f % lpostmul)) {
+			/* Fill leftovers with chunk of repeated string */
+			MB_METACHARINIT();
+			while (m > 0) {
+			    cl = MB_METACHARLENCONV(postmul, &cchar);
+			    m -= WCWIDTH(cchar);
+			    while (cl--)
+				*r++ = *postmul++;
+			}
+		    }
 		}
 	    }
 	} else {
+	    /*
+	     * Pad only on the left.
+	     */
 	    f = prenum - ls;
-	    if (f <= 0)
-		for (c = prenum, str -= f; c--; *r++ = *str++);
-	    else {
-		if (f <= lpreone)
-		    for (c = f, t = preone + lpreone - f; c--; *r++ = *t++);
-		else {
+	    if (f <= 0) {
+		/*
+		 * Original string is at least as wide as padding.
+		 * Truncate original string to width.
+		 * Truncate on left, so skip the characters we
+		 * don't need.
+		 */
+		f = -f;
+		MB_METACHARINIT();
+		while (f > 0) {
+		    str += MB_METACHARLENCONV(str, &cchar);
+		    f -= WCWIDTH(cchar);
+		}
+		/* Copy the rest of the original string */
+		for (c = prenum; c > 0; ) {
+		    cl = MB_METACHARLENCONV(str, &cchar);
+		    while (cl--)
+			*r++ = *str++;
+		    c -= WCWIDTH(cchar);
+		}
+	    } else {
+		/*
+		 * We can fit the entire string...
+		 */
+		if (f <= lpreone) {
+		    if (preone) {
+			/*
+			 * ...with some fraction of the unrepeated string.
+			 */
+			/* We need this width of characters. */
+			c = f;
+			/*
+			 * We therefore need to skip this width of
+			 * characters.
+			 */
+			f = lpreone - f;
+			MB_METACHARINIT();
+			for (t = preone; f > 0; ) {
+			    t += MB_METACHARLENCONV(t, &cchar);
+			    f -= WCWIDTH(cchar);
+			}
+			/* Copy the rest of preone */
+			while (*t)
+			    *r++ = *t++;
+		    }
+		} else {
+		    /*
+		     * We can fit the whole of preone, needing this width
+		     * first
+		     */
 		    f -= lpreone;
-		    if ((m = f % lpremul))
-			for (c = m, t = premul + lpremul - m; c--; *r++ = *t++);
-		    for (cc = f / lpremul; cc--;)
-			for (c = lpremul, t = premul; c--; *r++ = *t++);
-		    for (c = lpreone; c--; *r++ = *preone++);
+		    if ((m = f % lpremul)) {
+			/*
+			 * Some fraction of the repeated string needed.
+			 */
+			/* Need this much... */
+			c = m;
+			/* ...skipping this much first. */
+			m = lpremul - m;
+			MB_METACHARINIT();
+			for (t = premul; m > 0; ) {
+			    t += MB_METACHARLENCONV(t, &cchar);
+			    m -= WCWIDTH(cchar);
+			}
+			/* Now the rest of the repeated string. */
+			while (c > 0) {
+			    cl = MB_METACHARLENCONV(t, &cchar);
+			    while (cl--)
+				*r++ = *t++;
+			    c -= WCWIDTH(cchar);
+			}
+		    }
+		    for (cc = f / lpremul; cc--;) {
+			/*
+			 * Repeat the repeated string.
+			 */
+			MB_METACHARINIT();
+			for (c = lpremul, t = premul; c > 0; ) {
+			    cl = MB_METACHARLENCONV(t, &cchar);
+			    while (cl--)
+				*r++ = *t++;
+			    c -= WCWIDTH(cchar);
+			}
+		    }
+		    if (preone) {
+			/*
+			 * Now the entire unrepeated string.  Don't
+			 * count the width, just dump it.  This is
+			 * significant if there are special characters
+			 * in this string.  It's sort of a historical
+			 * accident that this worked, but there's nothing
+			 * to stop us just dumping the thing out and assuming
+			 * the user knows what they're doing.
+			 */
+			while (*preone)
+			    *r++ = *preone++;
+		    }
 		}
-		for (c = ls; c--; *r++ = *str++);
+		/* Now the string being padded */
+		while (*str)
+		    *r++ = *str++;
 	    }
 	}
     } else if (postnum) {
+	/*
+	 * Pad on the right.
+	 */
 	f = postnum - ls;
-	if (f <= 0)
-	    for (c = postnum; c--; *r++ = *str++);
-	else {
-	    for (c = ls; c--; *r++ = *str++);
-	    if (f <= lpostone)
-		for (c = f; c--; *r++ = *postone++);
-	    else {
-		f -= lpostone;
-		for (c = lpostone; c--; *r++ = *postone++);
-		for (cc = f / lpostmul; cc--;)
-		    for (c = lpostmul, t = postmul; c--; *r++ = *t++);
-		if ((m = f % lpostmul))
-		    for (; m--; *r++ = *postmul++);
+	MB_METACHARINIT();
+	if (f <= 0) {
+	    /*
+	     * Original string is at least as wide as padding.
+	     * Truncate original string to width.
+	     */
+	    for (c = postnum; c > 0; ) {
+		cl = MB_METACHARLENCONV(str, &cchar);
+		while (cl--)
+		    *r++ = *str++;
+		c -= WCWIDTH(cchar);
+	    }
+	} else {
+	    /*
+	     * There's some space to fill.  First copy the original
+	     * string, counting the width.  Make sure we copy the
+	     * entire string.
+	     */
+	    for (c = ls; *str; ) {
+		cl = MB_METACHARLENCONV(str, &cchar);
+		while (cl--)
+		    *r++ = *str++;
+		c -= WCWIDTH(cchar);
+	    }
+	    MB_METACHARINIT();
+	    if (f <= lpostone) {
+		if (postone) {
+		    /*
+		     * Not enough or only just enough space to fit
+		     * the unrepeated string.  Truncate as necessary.
+		     */
+		    for (c = f; c > 0; ) {
+			cl = MB_METACHARLENCONV(postone, &cchar);
+			while (cl--)
+			    *r++ = *postone++;
+			c -= WCWIDTH(cchar);
+		    }
+		}
+	    } else {
+		if (postone) {
+		    f -= lpostone;
+		    /* Copy the entire unrepeated string */
+		    for (c = lpostone; *postone; ) {
+			cl = MB_METACHARLENCONV(postone, &cchar);
+			while (cl--)
+			    *r++ = *postone++;
+			c -= WCWIDTH(cchar);
+		    }
+		}
+		/* Repeat the repeated string */
+		for (cc = f / lpostmul; cc--;) {
+		    MB_METACHARINIT();
+		    for (c = lpostmul, t = postmul; *t; ) {
+			cl = MB_METACHARLENCONV(t, &cchar);
+			while (cl--)
+			    *r++ = *t++;
+			c -= WCWIDTH(cchar);
+		    }
+		}
+		/*
+		 * See if there's any fraction of the repeated
+		 * string needed to fill up the remaining space.
+		 */
+		if ((m = f % lpostmul)) {
+		    MB_METACHARINIT();
+		    while (m > 0) {
+			cl = MB_METACHARLENCONV(postmul, &cchar);
+			while (cl--)
+			    *r++ = *postmul++;
+			m -= WCWIDTH(cchar);
+		    }
+		}
 	    }
 	}
     }
@@ -1779,6 +2069,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		 * by flags.  TODO: maybe therefore this would
 		 * be more consistent if moved into getstrvalue()?
 		 * Bet that's easier said than done.
+		 *
+		 * TODO: use string widths.  In fact, shouldn't the
+		 * strlen()s be ztrlen()s anyway?
 		 */
 		val = getstrvalue(v);
 		fwidth = v->pm->width ? v->pm->width : (int)strlen(val);
diff --git a/Src/utils.c b/Src/utils.c
index a72ddfcc5..37017bdc7 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -524,8 +524,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
 	return buf;
     }
 
-    if (widthp)
-	*widthp = (s - buf) + wcwidth(c);
+    if (widthp) {
+	int wcw = wcwidth(c);
+	*widthp = (s - buf);
+	if (wcw > 0)
+	    *widthp += wcw;
+    }
     if (swidep)
 	*swidep = s;
     for (mbptr = mbstr; ret; s++, mbptr++, ret--) {
@@ -539,6 +543,22 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     *s = 0;
     return buf;
 }
+
+/**/
+mod_export int
+zwcwidth(wint_t wc)
+{
+    int wcw;
+    /* assume a single-byte character if not valid */
+    if (wc == WEOF)
+	return 1;
+    wcw = wcwidth(wc);
+    /* if not printable, assume zero width */
+    if (wcw <= 0)
+	return 0;
+    return wcw;
+}
+
 /**/
 #endif /* MULTIBYTE_SUPPORT */
 
@@ -3953,58 +3973,6 @@ nicedup(const char *s, int heap)
     return retstr;
 }
 
-/*
- * Return the screen width of a multibyte string.  The input
- * string is metafied.
- */
-/**/
-mod_export int
-mb_width(const char *s)
-{
-    char *ums = ztrdup(s), *umptr;
-    int umlen, eol = 0;
-    int width = 0;
-    mbstate_t mbs;
-
-    memset(&mbs, 0, sizeof mbs);
-    umptr = unmetafy(ums, &umlen);
-    /*
-     * Convert one wide character at a time.  We could convet
-     * the entire string using mbsrtowcs(), but that terminates on
-     * a NUL and we might have embedded NULs.
-     */
-    while (umlen > 0) {
-	int wret;
-	wchar_t cc;
-	size_t cnt = eol ? MB_INVALID : mbrtowc(&cc, umptr, umlen, &mbs);
-
-	switch (cnt) {
-	case MB_INCOMPLETE:
-	    eol = 1;
-	    /* FALL THROUGH */
-	case MB_INVALID:
-	    memset(&mbs, 0, sizeof mbs);
-	    /* FALL THROUGH */
-	case 0:
-	    /* Assume a single-width character. */
-	    width++;
-	    cnt = 1;
-	    break;
-	default:
-	    wret = wcwidth(cc);
-	    if (wret > 0)
-		width += wret;
-	    break;
-	}
-
-	umlen -= cnt;
-	umptr += cnt;
-    }
-
-    free(ums);
-
-    return width;
-}
 
 /*
  * Length of metafied string s which contains the next multibyte
@@ -4107,9 +4075,15 @@ mb_metastrlen(char *ptr, int width)
 		memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
 		ptr = laststart + (*laststart == Meta) + 1;
 		num++;
-	    } else if (width)
-		num += wcwidth(wc);
-	    else
+	    } else if (width) {
+		/*
+		 * Returns -1 if not a printable character; best
+		 * just to ignore these.
+		 */
+		int wcw = wcwidth(wc);
+		if (wcw > 0)
+		    num += wcw;
+	    } else
 		num++;
 	    laststart = ptr;
 	    num_in_char = 0;
diff --git a/Src/zsh.h b/Src/zsh.h
index 3cb006cbf..27bb96493 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -2012,7 +2012,7 @@ typedef wint_t convchar_t;
  * It's written to use the wint_t from mb_metacharlenconv() without
  * further tests.
  */
-#define WCWIDTH(wc)	((wc == WEOF) ? 1 : wcwidth(wc))
+#define WCWIDTH(wc)	zwcwidth(wc)
 
 #define MB_INCOMPLETE	((size_t)-2)
 #define MB_INVALID	((size_t)-1)