summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2006-12-13 22:30:37 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2006-12-13 22:30:37 +0000
commitd8e36bffa29b31c3533b4877701e37e3cffe44fb (patch)
tree1b6c3cb8bc7c42ebed3b20fe08377aad663354f0
parentf50dfd61f66d61d17c2ae14e03f32a502da577fa (diff)
downloadzsh-d8e36bffa29b31c3533b4877701e37e3cffe44fb.tar.gz
zsh-d8e36bffa29b31c3533b4877701e37e3cffe44fb.tar.xz
zsh-d8e36bffa29b31c3533b4877701e37e3cffe44fb.zip
23052: multibyte characters in typeset -L/R/Z padding
-rw-r--r--ChangeLog5
-rw-r--r--Doc/Zsh/builtins.yo9
-rw-r--r--Src/subst.c76
3 files changed, 60 insertions, 30 deletions
diff --git a/ChangeLog b/ChangeLog
index 390e0f017..a15e690e9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2006-12-13  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 23052: Doc/Zsh/builtins.yo, Src/subst.c: multibyte
+	characters in typeset -L/R/Z padding.
+
 2006-12-13  Peter Stephenson  <pws@csr.com>
 
 	* 23051: Src/lex.c: using ${(Q)...} on an expression with
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index 27f91e8b6..51c1fd00c 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -1405,6 +1405,15 @@ If var(n) is zero, the width is determined by the width of the value of
 the first assignment.  In the case of numeric parameters, the length of the
 complete value assigned to the parameter is used to determine the width,
 not the value that would be output.
+
+The width is the count of characters, which may be multibyte characters
+if the tt(MULTIBYTE) option is in effect.  Note that the screen
+width of the character is not taken into account; if this is required,
+use padding with parameter expansion flags
+tt(${+LPAR()ml)var(...)tt(RPAR())var(...)tt(}) as described in
+`Parameter Expansion Flags' in
+ifzman(zmanref(zshexpn))ifnzman(noderef(Parameter Expansion)).
+
 When the parameter is expanded, it is filled on the right with
 blanks or truncated if necessary to fit the field.
 Note truncation can lead to unexpected results with numeric parameters.
diff --git a/Src/subst.c b/Src/subst.c
index 45c54b9f0..2bc6d75a6 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -761,8 +761,8 @@ invinstrpcmp(const void *a, const void *b)
 /*
  * Pad the string str, returning a result from the heap (or str itself,
  * if it didn't need padding).  If str is too large, it will be truncated.
- * Calculations are in terms of width if MULTIBYTE is in effect, else
- * characters.
+ * Calculations are in terms of width if MULTIBYTE is in effect and
+ * multi_width is non-zero, else characters.
  *
  * prenum and postnum are the width to which the string needs padding
  * on the left and right.
@@ -2211,7 +2211,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		val = getstrvalue(v);
 		fwidth = v->pm->width ? v->pm->width : (int)strlen(val);
 		switch (v->pm->node.flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) {
-		    char *t;
+		    char *t, *tend;
 		    unsigned int t0;
 
 		case PM_LEFT:
@@ -2223,21 +2223,39 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    else
 			while (iblank(*t))
 			    t++;
-		    val = (char *) hcalloc(fwidth + 1);
-		    val[fwidth] = '\0';
-		    if ((t0 = strlen(t)) > fwidth)
-			t0 = fwidth;
-		    memset(val, ' ', fwidth);
-		    strncpy(val, t, t0);
+		    MB_METACHARINIT();
+		    for (tend = t, t0 = 0; t0 < fwidth && *tend; t0++)
+			tend += MB_METACHARLEN(tend);
+		    /*
+		     * t0 is the number of characters from t used,
+		     * hence (fwidth - t0) is the number of padding
+		     * characters.  fwidth is a misnomer: we use
+		     * character counts, not character widths.
+		     *
+		     * (tend - t) is the number of bytes we need
+		     * to get fwidth characters or the entire string;
+		     * the characters may be multiple bytes.
+		     */
+		    fwidth -= t0; /* padding chars remaining */
+		    t0 = tend - t; /* bytes to copy from string */
+		    val = (char *) hcalloc(t0 + fwidth + 1);
+		    memcpy(val, t, t0);
+		    if (fwidth)
+			memset(val + t0, ' ', fwidth);
+		    val[t0 + fwidth] = '\0';
+		    copied = 1;
 		    break;
 		case PM_RIGHT_B:
 		case PM_RIGHT_Z:
 		case PM_RIGHT_Z | PM_RIGHT_B:
 		    {
 			int zero = 1;
+			/* Calculate length in possibly multibyte chars */
+			int charlen = MB_METASTRLEN(val);
 
-			if (strlen(val) < fwidth) {
+			if (charlen < fwidth) {
 			    char *valprefend = val;
+			    int preflen;
 			    if (v->pm->node.flags & PM_RIGHT_Z) {
 				/*
 				 * This is a documented feature: when deciding
@@ -2277,33 +2295,31 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 				} else if (!idigit(*t))
 				    zero = 0;
 			    }
-			    t = (char *) hcalloc(fwidth + 1);
-			    memset(t, (((v->pm->node.flags & PM_RIGHT_B) || !zero) ?
-				       ' ' : '0'), fwidth);
-			    /*
-			     * How can the following trigger?  We
-			     * haven't altered val or fwidth since
-			     * the last time we tested this.
-			     */
-			    if ((t0 = strlen(val)) > fwidth)
-				t0 = fwidth;
+			    /* number of characters needed for padding */
+			    fwidth -= charlen;
+			    /* bytes from original string */
+			    t0 = strlen(val);
+			    t = (char *) hcalloc(fwidth + t0 + 1);
+			    /* prefix guaranteed to be single byte chars */
+			    preflen = valprefend - val;
+			    memset(t + preflen, 
+				   (((v->pm->node.flags & PM_RIGHT_B)
+				     || !zero) ?       ' ' : '0'), fwidth);
 			    /*
 			     * Copy - or 0x or base# before any padding
 			     * zeroes.
 			     */
-			    if (zero && val != valprefend) {
-				int preflen = valprefend - val;
+			    if (preflen)
 				memcpy(t, val, preflen);
-				strcpy(t + (fwidth - t0) + preflen,
-				       valprefend);
-			    } else
-				strcpy(t + (fwidth - t0), val);
+			    memcpy(t + preflen + fwidth,
+				   valprefend, t0 - preflen);
+			    t[fwidth + t0] = '\0';
 			    val = t;
+			    copied = 1;
 			} else {
-			    t = (char *) hcalloc(fwidth + 1);
-			    t[fwidth] = '\0';
-			    strncpy(t, val + strlen(val) - fwidth, fwidth);
-			    val = t;
+			    /* Need to skip (charlen - fwidth) chars */
+			    for (t0 = charlen - fwidth; t0; t0--)
+				val += MB_METACHARLEN(val);
 			}
 		    }
 		    break;