From d8e36bffa29b31c3533b4877701e37e3cffe44fb Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Wed, 13 Dec 2006 22:30:37 +0000 Subject: 23052: multibyte characters in typeset -L/R/Z padding --- ChangeLog | 5 ++++ Doc/Zsh/builtins.yo | 9 +++++++ Src/subst.c | 76 ++++++++++++++++++++++++++++++++--------------------- 3 files changed, 60 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 390e0f017..a15e690e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2006-12-13 Peter Stephenson + + * 23052: Doc/Zsh/builtins.yo, Src/subst.c: multibyte + characters in typeset -L/R/Z padding. + 2006-12-13 Peter Stephenson * 23051: Src/lex.c: using ${(Q)...} on an expression with diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo index 27f91e8b6..51c1fd00c 100644 --- a/Doc/Zsh/builtins.yo +++ b/Doc/Zsh/builtins.yo @@ -1405,6 +1405,15 @@ If var(n) is zero, the width is determined by the width of the value of the first assignment. In the case of numeric parameters, the length of the complete value assigned to the parameter is used to determine the width, not the value that would be output. + +The width is the count of characters, which may be multibyte characters +if the tt(MULTIBYTE) option is in effect. Note that the screen +width of the character is not taken into account; if this is required, +use padding with parameter expansion flags +tt(${+LPAR()ml)var(...)tt(RPAR())var(...)tt(}) as described in +`Parameter Expansion Flags' in +ifzman(zmanref(zshexpn))ifnzman(noderef(Parameter Expansion)). + When the parameter is expanded, it is filled on the right with blanks or truncated if necessary to fit the field. Note truncation can lead to unexpected results with numeric parameters. diff --git a/Src/subst.c b/Src/subst.c index 45c54b9f0..2bc6d75a6 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -761,8 +761,8 @@ invinstrpcmp(const void *a, const void *b) /* * Pad the string str, returning a result from the heap (or str itself, * if it didn't need padding). If str is too large, it will be truncated. - * Calculations are in terms of width if MULTIBYTE is in effect, else - * characters. + * Calculations are in terms of width if MULTIBYTE is in effect and + * multi_width is non-zero, else characters. * * prenum and postnum are the width to which the string needs padding * on the left and right. @@ -2211,7 +2211,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) val = getstrvalue(v); fwidth = v->pm->width ? v->pm->width : (int)strlen(val); switch (v->pm->node.flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) { - char *t; + char *t, *tend; unsigned int t0; case PM_LEFT: @@ -2223,21 +2223,39 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) else while (iblank(*t)) t++; - val = (char *) hcalloc(fwidth + 1); - val[fwidth] = '\0'; - if ((t0 = strlen(t)) > fwidth) - t0 = fwidth; - memset(val, ' ', fwidth); - strncpy(val, t, t0); + MB_METACHARINIT(); + for (tend = t, t0 = 0; t0 < fwidth && *tend; t0++) + tend += MB_METACHARLEN(tend); + /* + * t0 is the number of characters from t used, + * hence (fwidth - t0) is the number of padding + * characters. fwidth is a misnomer: we use + * character counts, not character widths. + * + * (tend - t) is the number of bytes we need + * to get fwidth characters or the entire string; + * the characters may be multiple bytes. + */ + fwidth -= t0; /* padding chars remaining */ + t0 = tend - t; /* bytes to copy from string */ + val = (char *) hcalloc(t0 + fwidth + 1); + memcpy(val, t, t0); + if (fwidth) + memset(val + t0, ' ', fwidth); + val[t0 + fwidth] = '\0'; + copied = 1; break; case PM_RIGHT_B: case PM_RIGHT_Z: case PM_RIGHT_Z | PM_RIGHT_B: { int zero = 1; + /* Calculate length in possibly multibyte chars */ + int charlen = MB_METASTRLEN(val); - if (strlen(val) < fwidth) { + if (charlen < fwidth) { char *valprefend = val; + int preflen; if (v->pm->node.flags & PM_RIGHT_Z) { /* * This is a documented feature: when deciding @@ -2277,33 +2295,31 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } else if (!idigit(*t)) zero = 0; } - t = (char *) hcalloc(fwidth + 1); - memset(t, (((v->pm->node.flags & PM_RIGHT_B) || !zero) ? - ' ' : '0'), fwidth); - /* - * How can the following trigger? We - * haven't altered val or fwidth since - * the last time we tested this. - */ - if ((t0 = strlen(val)) > fwidth) - t0 = fwidth; + /* number of characters needed for padding */ + fwidth -= charlen; + /* bytes from original string */ + t0 = strlen(val); + t = (char *) hcalloc(fwidth + t0 + 1); + /* prefix guaranteed to be single byte chars */ + preflen = valprefend - val; + memset(t + preflen, + (((v->pm->node.flags & PM_RIGHT_B) + || !zero) ? ' ' : '0'), fwidth); /* * Copy - or 0x or base# before any padding * zeroes. */ - if (zero && val != valprefend) { - int preflen = valprefend - val; + if (preflen) memcpy(t, val, preflen); - strcpy(t + (fwidth - t0) + preflen, - valprefend); - } else - strcpy(t + (fwidth - t0), val); + memcpy(t + preflen + fwidth, + valprefend, t0 - preflen); + t[fwidth + t0] = '\0'; val = t; + copied = 1; } else { - t = (char *) hcalloc(fwidth + 1); - t[fwidth] = '\0'; - strncpy(t, val + strlen(val) - fwidth, fwidth); - val = t; + /* Need to skip (charlen - fwidth) chars */ + for (t0 = charlen - fwidth; t0; t0--) + val += MB_METACHARLEN(val); } } break; -- cgit 1.4.1