diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2006-09-13 20:55:29 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2006-09-13 20:55:29 +0000 |
commit | a242b1eb35863b73cbc63699fafe920e8b92c858 (patch) | |
tree | 141db2c3c4a20d1a44d7fe357a39d0ba4aab9d4f /Src | |
parent | efd061cdc9bdc0ba692387ec25eb6d01616d0425 (diff) | |
download | zsh-a242b1eb35863b73cbc63699fafe920e8b92c858.tar.gz zsh-a242b1eb35863b73cbc63699fafe920e8b92c858.tar.xz zsh-a242b1eb35863b73cbc63699fafe920e8b92c858.zip |
22705: make ${(l...)...} and ${(r...)...} handle multibyte characters
Diffstat (limited to 'Src')
-rw-r--r-- | Src/prompt.c | 18 | ||||
-rw-r--r-- | Src/subst.c | 413 | ||||
-rw-r--r-- | Src/utils.c | 88 | ||||
-rw-r--r-- | Src/zsh.h | 2 |
4 files changed, 394 insertions, 127 deletions
diff --git a/Src/prompt.c b/Src/prompt.c index 21dff16e0..974f70e40 100644 --- a/Src/prompt.c +++ b/Src/prompt.c @@ -1058,12 +1058,7 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar) int twidth, maxwidth; int ntrunc = strlen(t); -#ifdef MULTIBYTE_SUPPORT - /* Use screen width of string */ - twidth = mb_width(t); -#else - twidth = ztrlen(t); -#endif + twidth = MB_METASTRWIDTH(t); if (twidth < truncwidth) { maxwidth = truncwidth - twidth; /* @@ -1130,7 +1125,7 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar) * Normal text: build up a multibyte character. */ char inchar; - wchar_t cc; + wchar_t cc, wcw; /* * careful: string is still metafied (we @@ -1156,7 +1151,9 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar) remw--; break; default: - remw -= wcwidth(cc); + wcw = wcwidth(cc); + if (wcw > 0) + remw -= wcw; break; } #else @@ -1197,6 +1194,7 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar) #ifdef MULTIBYTE_SUPPORT char inchar; wchar_t cc; + int wcw; if (*skiptext == Meta) inchar = *++skiptext ^ 32; @@ -1216,7 +1214,9 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar) maxwidth--; break; default: - maxwidth -= wcwidth(cc); + wcw = wcwidth(cc); + if (wcw > 0) + maxwidth -= wcw; break; } #else diff --git a/Src/subst.c b/Src/subst.c index 3a2c3e111..2be854524 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -718,12 +718,34 @@ invinstrpcmp(const void *a, const void *b) return -instrpcmp(a, b); } +/* + * Pad the string str, returning a result from the heap (or str itself, + * if it didn't need padding). If str is too large, it will be truncated. + * Calculations are in terms of width if MULTIBYTE is in effect, else + * characters. + * + * prenum and postnum are the width to which the string needs padding + * on the left and right. + * + * preone and postone are string to insert once only before and after + * str. They will be truncated on the left or right, respectively, + * if necessary to fit the width. Either or both may be NULL in which + * case they will not be used. + * + * premul and postmul are the padding strings to be repeated before + * on the left (if prenum is non-zero) and right (if postnum is non-zero). If + * NULL the first character of IFS (typically but not necessarily a space) + * will be used. + */ + /**/ static char * -dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul) +dopadding(char *str, int prenum, int postnum, char *preone, char *postone, + char *premul, char *postmul) { char *def, *ret, *t, *r; - int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc; + int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc, cl; + convchar_t cchar; MB_METACHARINIT(); if (*ifs) @@ -739,89 +761,357 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char if (!postmul || !*postmul) postmul = def; - ls = strlen(str); - lpreone = preone ? strlen(preone) : 0; - lpostone = postone ? strlen(postone) : 0; - lpremul = strlen(premul); - lpostmul = strlen(postmul); + ls = MB_METASTRWIDTH(str); + lpreone = preone ? MB_METASTRWIDTH(preone) : 0; + lpostone = postone ? MB_METASTRWIDTH(postone) : 0; + lpremul = MB_METASTRWIDTH(premul); + lpostmul = MB_METASTRWIDTH(postmul); - lr = prenum + postnum; - - if (lr == ls) + if (prenum + postnum == ls) return str; + /* + * Try to be careful with allocated lengths. The following + * is a maximum, in case we need the entire repeated string + * for each repetition. We probably don't, but in case the user + * has given us something pathological which doesn't convert + * easily into a width we'd better be safe. + */ + lr = strlen(str) + strlen(premul) * prenum + strlen(postmul) * postnum; + /* + * Same logic for preone and postone, except those may be NULL. + */ + if (preone) + lr += strlen(preone); + if (postone) + lr += strlen(postone); r = ret = (char *)zhalloc(lr + 1); if (prenum) { + /* + * Pad on the left. + */ if (postnum) { + /* + * Pad on both right and left. + * The strategy is to divide the string into two halves. + * The first half is dealt with by the left hand padding + * code, the second by the right hand. + */ ls2 = ls / 2; + /* The width left to pad for the first half. */ f = prenum - ls2; - if (f <= 0) - for (str -= f, c = prenum; c--; *r++ = *str++); - else { - if (f <= lpreone) - for (c = f, t = preone + lpreone - f; c--; *r++ = *t++); - else { + if (f <= 0) { + /* First half doesn't fit. Skip the first -f width. */ + f = -f; + MB_METACHARINIT(); + while (f > 0) { + str += MB_METACHARLENCONV(str, &cchar); + f -= WCWIDTH(cchar); + } + /* Now finish the first half. */ + for (c = prenum; c > 0; ) { + cl = MB_METACHARLENCONV(str, &cchar); + while (cl--) + *r++ = *str++; + c -= WCWIDTH(cchar); + } + } else { + if (f <= lpreone) { + if (preone) { + /* + * The unrepeated string doesn't fit. + */ + MB_METACHARINIT(); + /* The width we need to skip */ + f = lpreone - f; + /* So skip. */ + for (t = preone; f > 0; ) { + t += MB_METACHARLENCONV(t, &cchar); + f -= WCWIDTH(cchar); + } + /* Then copy the entire remainder. */ + while (*t) + *r++ = *t++; + } + } else { f -= lpreone; - if ((m = f % lpremul)) - for (c = m, t = premul + lpremul - m; c--; *r++ = *t++); - for (cc = f / lpremul; cc--;) - for (c = lpremul, t = premul; c--; *r++ = *t++); - for (c = lpreone; c--; *r++ = *preone++); + if ((m = f % lpremul)) { + /* + * Left over fraction of repeated string. + */ + MB_METACHARINIT(); + /* Skip this much. */ + m = lpremul - m; + for (t = premul; m > 0; ) { + t += MB_METACHARLENCONV(t, &cchar); + m -= WCWIDTH(cchar); + } + /* Output the rest. */ + while (*t) + *r++ = *t++; + } + for (cc = f / lpremul; cc--;) { + /* Repeat the repeated string */ + MB_METACHARINIT(); + for (c = lpremul, t = premul; c > 0; ) { + cl = MB_METACHARLENCONV(t, &cchar); + while (cl--) + *r++ = *t++; + c -= WCWIDTH(cchar); + } + } + if (preone) { + /* Output the full unrepeated string */ + while (*preone) + *r++ = *preone++; + } + } + /* Output the first half width of the original string. */ + for (c = ls2; c > 0; ) { + cl = MB_METACHARLENCONV(str, &cchar); + c -= WCWIDTH(cchar); + while (cl--) + *r++ = *str++; } - for (c = ls2; c--; *r++ = *str++); } + /* Other half. In case the string had an odd length... */ ls2 = ls - ls2; + /* Width that needs padding... */ f = postnum - ls2; - if (f <= 0) - for (c = postnum; c--; *r++ = *str++); - else { - for (c = ls2; c--; *r++ = *str++); - if (f <= lpostone) - for (c = f; c--; *r++ = *postone++); - else { - f -= lpostone; - for (c = lpostone; c--; *r++ = *postone++); - for (cc = f / lpostmul; cc--;) - for (c = lpostmul, t = postmul; c--; *r++ = *t++); - if ((m = f % lpostmul)) - for (; m--; *r++ = *postmul++); + if (f <= 0) { + /* ...is negative, truncate original string */ + MB_METACHARINIT(); + for (c = postnum; c > 0; ) { + cl = MB_METACHARLENCONV(str, &cchar); + c -= WCWIDTH(cchar); + while (cl--) + *r++ = *str++; + } + } else { + /* Rest of original string fits, output it complete */ + while (*str) + *r++ = *str++; + if (f <= lpostone) { + if (postone) { + /* Can't fit unrepeated string, truncate it */ + for (c = f; c > 0; ) { + cl = MB_METACHARLENCONV(postone, &cchar); + c -= WCWIDTH(cchar); + while (cl--) + *r++ = *postone++; + } + } + } else { + if (postone) { + f -= lpostone; + /* Output entire unrepeated string */ + while (*postone) + *r++ = *postone++; + } + for (cc = f / lpostmul; cc--;) { + /* Begin the beguine */ + for (t = postmul; *t; ) + *r++ = *t++; + } + if ((m = f % lpostmul)) { + /* Fill leftovers with chunk of repeated string */ + MB_METACHARINIT(); + while (m > 0) { + cl = MB_METACHARLENCONV(postmul, &cchar); + m -= WCWIDTH(cchar); + while (cl--) + *r++ = *postmul++; + } + } } } } else { + /* + * Pad only on the left. + */ f = prenum - ls; - if (f <= 0) - for (c = prenum, str -= f; c--; *r++ = *str++); - else { - if (f <= lpreone) - for (c = f, t = preone + lpreone - f; c--; *r++ = *t++); - else { + if (f <= 0) { + /* + * Original string is at least as wide as padding. + * Truncate original string to width. + * Truncate on left, so skip the characters we + * don't need. + */ + f = -f; + MB_METACHARINIT(); + while (f > 0) { + str += MB_METACHARLENCONV(str, &cchar); + f -= WCWIDTH(cchar); + } + /* Copy the rest of the original string */ + for (c = prenum; c > 0; ) { + cl = MB_METACHARLENCONV(str, &cchar); + while (cl--) + *r++ = *str++; + c -= WCWIDTH(cchar); + } + } else { + /* + * We can fit the entire string... + */ + if (f <= lpreone) { + if (preone) { + /* + * ...with some fraction of the unrepeated string. + */ + /* We need this width of characters. */ + c = f; + /* + * We therefore need to skip this width of + * characters. + */ + f = lpreone - f; + MB_METACHARINIT(); + for (t = preone; f > 0; ) { + t += MB_METACHARLENCONV(t, &cchar); + f -= WCWIDTH(cchar); + } + /* Copy the rest of preone */ + while (*t) + *r++ = *t++; + } + } else { + /* + * We can fit the whole of preone, needing this width + * first + */ f -= lpreone; - if ((m = f % lpremul)) - for (c = m, t = premul + lpremul - m; c--; *r++ = *t++); - for (cc = f / lpremul; cc--;) - for (c = lpremul, t = premul; c--; *r++ = *t++); - for (c = lpreone; c--; *r++ = *preone++); + if ((m = f % lpremul)) { + /* + * Some fraction of the repeated string needed. + */ + /* Need this much... */ + c = m; + /* ...skipping this much first. */ + m = lpremul - m; + MB_METACHARINIT(); + for (t = premul; m > 0; ) { + t += MB_METACHARLENCONV(t, &cchar); + m -= WCWIDTH(cchar); + } + /* Now the rest of the repeated string. */ + while (c > 0) { + cl = MB_METACHARLENCONV(t, &cchar); + while (cl--) + *r++ = *t++; + c -= WCWIDTH(cchar); + } + } + for (cc = f / lpremul; cc--;) { + /* + * Repeat the repeated string. + */ + MB_METACHARINIT(); + for (c = lpremul, t = premul; c > 0; ) { + cl = MB_METACHARLENCONV(t, &cchar); + while (cl--) + *r++ = *t++; + c -= WCWIDTH(cchar); + } + } + if (preone) { + /* + * Now the entire unrepeated string. Don't + * count the width, just dump it. This is + * significant if there are special characters + * in this string. It's sort of a historical + * accident that this worked, but there's nothing + * to stop us just dumping the thing out and assuming + * the user knows what they're doing. + */ + while (*preone) + *r++ = *preone++; + } } - for (c = ls; c--; *r++ = *str++); + /* Now the string being padded */ + while (*str) + *r++ = *str++; } } } else if (postnum) { + /* + * Pad on the right. + */ f = postnum - ls; - if (f <= 0) - for (c = postnum; c--; *r++ = *str++); - else { - for (c = ls; c--; *r++ = *str++); - if (f <= lpostone) - for (c = f; c--; *r++ = *postone++); - else { - f -= lpostone; - for (c = lpostone; c--; *r++ = *postone++); - for (cc = f / lpostmul; cc--;) - for (c = lpostmul, t = postmul; c--; *r++ = *t++); - if ((m = f % lpostmul)) - for (; m--; *r++ = *postmul++); + MB_METACHARINIT(); + if (f <= 0) { + /* + * Original string is at least as wide as padding. + * Truncate original string to width. + */ + for (c = postnum; c > 0; ) { + cl = MB_METACHARLENCONV(str, &cchar); + while (cl--) + *r++ = *str++; + c -= WCWIDTH(cchar); + } + } else { + /* + * There's some space to fill. First copy the original + * string, counting the width. Make sure we copy the + * entire string. + */ + for (c = ls; *str; ) { + cl = MB_METACHARLENCONV(str, &cchar); + while (cl--) + *r++ = *str++; + c -= WCWIDTH(cchar); + } + MB_METACHARINIT(); + if (f <= lpostone) { + if (postone) { + /* + * Not enough or only just enough space to fit + * the unrepeated string. Truncate as necessary. + */ + for (c = f; c > 0; ) { + cl = MB_METACHARLENCONV(postone, &cchar); + while (cl--) + *r++ = *postone++; + c -= WCWIDTH(cchar); + } + } + } else { + if (postone) { + f -= lpostone; + /* Copy the entire unrepeated string */ + for (c = lpostone; *postone; ) { + cl = MB_METACHARLENCONV(postone, &cchar); + while (cl--) + *r++ = *postone++; + c -= WCWIDTH(cchar); + } + } + /* Repeat the repeated string */ + for (cc = f / lpostmul; cc--;) { + MB_METACHARINIT(); + for (c = lpostmul, t = postmul; *t; ) { + cl = MB_METACHARLENCONV(t, &cchar); + while (cl--) + *r++ = *t++; + c -= WCWIDTH(cchar); + } + } + /* + * See if there's any fraction of the repeated + * string needed to fill up the remaining space. + */ + if ((m = f % lpostmul)) { + MB_METACHARINIT(); + while (m > 0) { + cl = MB_METACHARLENCONV(postmul, &cchar); + while (cl--) + *r++ = *postmul++; + m -= WCWIDTH(cchar); + } + } } } } @@ -1779,6 +2069,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) * by flags. TODO: maybe therefore this would * be more consistent if moved into getstrvalue()? * Bet that's easier said than done. + * + * TODO: use string widths. In fact, shouldn't the + * strlen()s be ztrlen()s anyway? */ val = getstrvalue(v); fwidth = v->pm->width ? v->pm->width : (int)strlen(val); diff --git a/Src/utils.c b/Src/utils.c index a72ddfcc5..37017bdc7 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -524,8 +524,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) return buf; } - if (widthp) - *widthp = (s - buf) + wcwidth(c); + if (widthp) { + int wcw = wcwidth(c); + *widthp = (s - buf); + if (wcw > 0) + *widthp += wcw; + } if (swidep) *swidep = s; for (mbptr = mbstr; ret; s++, mbptr++, ret--) { @@ -539,6 +543,22 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) *s = 0; return buf; } + +/**/ +mod_export int +zwcwidth(wint_t wc) +{ + int wcw; + /* assume a single-byte character if not valid */ + if (wc == WEOF) + return 1; + wcw = wcwidth(wc); + /* if not printable, assume zero width */ + if (wcw <= 0) + return 0; + return wcw; +} + /**/ #endif /* MULTIBYTE_SUPPORT */ @@ -3953,58 +3973,6 @@ nicedup(const char *s, int heap) return retstr; } -/* - * Return the screen width of a multibyte string. The input - * string is metafied. - */ -/**/ -mod_export int -mb_width(const char *s) -{ - char *ums = ztrdup(s), *umptr; - int umlen, eol = 0; - int width = 0; - mbstate_t mbs; - - memset(&mbs, 0, sizeof mbs); - umptr = unmetafy(ums, ¨en); - /* - * Convert one wide character at a time. We could convet - * the entire string using mbsrtowcs(), but that terminates on - * a NUL and we might have embedded NULs. - */ - while (umlen > 0) { - int wret; - wchar_t cc; - size_t cnt = eol ? MB_INVALID : mbrtowc(&cc, umptr, umlen, &mbs); - - switch (cnt) { - case MB_INCOMPLETE: - eol = 1; - /* FALL THROUGH */ - case MB_INVALID: - memset(&mbs, 0, sizeof mbs); - /* FALL THROUGH */ - case 0: - /* Assume a single-width character. */ - width++; - cnt = 1; - break; - default: - wret = wcwidth(cc); - if (wret > 0) - width += wret; - break; - } - - umlen -= cnt; - umptr += cnt; - } - - free(ums); - - return width; -} /* * Length of metafied string s which contains the next multibyte @@ -4107,9 +4075,15 @@ mb_metastrlen(char *ptr, int width) memset(&mb_shiftstate, 0, sizeof(mb_shiftstate)); ptr = laststart + (*laststart == Meta) + 1; num++; - } else if (width) - num += wcwidth(wc); - else + } else if (width) { + /* + * Returns -1 if not a printable character; best + * just to ignore these. + */ + int wcw = wcwidth(wc); + if (wcw > 0) + num += wcw; + } else num++; laststart = ptr; num_in_char = 0; diff --git a/Src/zsh.h b/Src/zsh.h index 3cb006cbf..27bb96493 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -2012,7 +2012,7 @@ typedef wint_t convchar_t; * It's written to use the wint_t from mb_metacharlenconv() without * further tests. */ -#define WCWIDTH(wc) ((wc == WEOF) ? 1 : wcwidth(wc)) +#define WCWIDTH(wc) zwcwidth(wc) #define MB_INCOMPLETE ((size_t)-2) #define MB_INVALID ((size_t)-1) |