summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Src/builtin.c78
-rw-r--r--Test/D07multibyte.ztst4
3 files changed, 75 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 3e5c5d4d2..850059322 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2007-01-09  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 23098: Src/builtin.c, Test/D07multibyte.ztst: print widths
+	in printf take account of multibyte characters.
+
 2007-01-08  Peter Stephenson  <p.w.stephenson@ntlworld.com>
 
 	* 23097: Src/lex.c, Src/utils.c, Src/zsh.h, Src/Zle/compcore.c:
diff --git a/Src/builtin.c b/Src/builtin.c
index 8e579d24e..260ba603b 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -3792,6 +3792,12 @@ bin_print(char *name, char **args, Options ops, int func)
 	return ret;
     }
     
+    /*
+     * All the remaining code in this function is for printf-style
+     * output (printf itself, or print -f).  We still have to handle
+     * special cases of printing to a ZLE buffer or the history, however.
+     */
+
     if (OPT_ISSET(ops,'z') || OPT_ISSET(ops,'s')) {
 #ifdef HAVE_OPEN_MEMSTREAM
     	if ((fout = open_memstream(&buf, &mcount)) == NULL)
@@ -3948,26 +3954,74 @@ bin_print(char *name, char **args, Options ops, int func)
 	    case 's':
 	    case 'b':
 		if (curarg) {
-		    char *b;
-		    int l;
+		    char *b, *ptr;
+		    int lbytes, lchars, lleft;
+#ifdef MULTIBYTE_SUPPORT
+		    mbstate_t mbs;
+#endif
+
 		    if (*c == 'b') {
 			b = getkeystring(metafy(curarg, curlen, META_USEHEAP),
-					 &l,
+					 &lbytes,
 					 OPT_ISSET(ops,'b') ? GETKEYS_BINDKEY :
 					 GETKEYS_PRINTF_ARG, &nnl);
 		    } else {
 			b = curarg;
-			l = curlen;
+			lbytes = curlen;
+		    }
+		    /*
+		     * Handle width/precision here and use fwrite so that
+		     * nul characters can be output.
+		     *
+		     * First, examine width of string given that it
+		     * may contain multibyte characters.  The output
+		     * widths are for characters, so we need to count
+		     * (in lchars).  However, if we need to truncate
+		     * the string we need the width in bytes (in lbytes).
+		     */
+		    ptr = b;
+#ifdef MULTIBYTE_SUPPORT
+		    memset(&mbs, 0, sizeof(mbs));
+#endif
+
+		    for (lchars = 0, lleft = lbytes; lleft > 0; lchars++) {
+			int chars;
+
+			if (lchars == prec) {
+			    /* Truncate at this point. */
+			    lbytes = ptr - b;
+			    break;
+			}
+#ifdef MULTIBYTE_SUPPORT
+			if (isset(MULTIBYTE)) {
+			    chars = mbrlen(ptr, lleft, &mbs);
+			    if (chars < 0) {
+				/*
+				 * Invalid/incomplete character at this
+				 * point.  Assume all the rest are a
+				 * single byte.  That's about the best we
+				 * can do.
+				 */
+				lchars += lleft;
+				lbytes = (ptr - b) + lleft;
+				break;
+			    } else if (chars == 0) {
+				/* NUL, handle as real character */
+				chars = 1;
+			    }
+			}
+			else	/* use the non-multibyte code below */
+#endif
+			    chars = 1; /* compiler can optimise this...*/
+			lleft -= chars;
+			ptr += chars;
 		    }
-		    /* handle width/precision here and use fwrite so that
-		     * nul characters can be output */
-		    if (prec >= 0 && prec < l) l = prec;
 		    if (width > 0 && flags[2]) width = -width;
-		    if (width > 0 && l < width)
-		    	count += fprintf(fout, "%*c", width - l, ' ');
-		    count += fwrite(b, 1, l, fout);
-		    if (width < 0 && l < -width)
-		    	count += fprintf(fout, "%*c", -width - l, ' ');
+		    if (width > 0 && lchars < width)
+		    	count += fprintf(fout, "%*c", width - lchars, ' ');
+		    count += fwrite(b, 1, lbytes, fout);
+		    if (width < 0 && lchars < -width)
+		    	count += fprintf(fout, "%*c", -width - lchars, ' ');
 		    if (nnl) {
 			/* If the %b arg had a \c escape, truncate the fmt. */
 			flen = c - fmt + 1;
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 828a5c573..ecac737a1 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -311,3 +311,7 @@
 0:Delimiters in parameter flags
 >barXX
 >YYYYYHIbar
+
+  printf "%4.3s\n" főobar
+0:Multibyte characters in printf widths
+> főo