From de815e3d22be721c09d6f0bc1896207cc3e3827b Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 9 Jan 2007 21:45:45 +0000 Subject: 23098: printf multibyte character widths --- ChangeLog | 5 ++++ Src/builtin.c | 78 ++++++++++++++++++++++++++++++++++++++++++-------- Test/D07multibyte.ztst | 4 +++ 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3e5c5d4d2..850059322 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2007-01-09 Peter Stephenson + + * 23098: Src/builtin.c, Test/D07multibyte.ztst: print widths + in printf take account of multibyte characters. + 2007-01-08 Peter Stephenson * 23097: Src/lex.c, Src/utils.c, Src/zsh.h, Src/Zle/compcore.c: diff --git a/Src/builtin.c b/Src/builtin.c index 8e579d24e..260ba603b 100644 --- a/Src/builtin.c +++ b/Src/builtin.c @@ -3792,6 +3792,12 @@ bin_print(char *name, char **args, Options ops, int func) return ret; } + /* + * All the remaining code in this function is for printf-style + * output (printf itself, or print -f). We still have to handle + * special cases of printing to a ZLE buffer or the history, however. + */ + if (OPT_ISSET(ops,'z') || OPT_ISSET(ops,'s')) { #ifdef HAVE_OPEN_MEMSTREAM if ((fout = open_memstream(&buf, &mcount)) == NULL) @@ -3948,26 +3954,74 @@ bin_print(char *name, char **args, Options ops, int func) case 's': case 'b': if (curarg) { - char *b; - int l; + char *b, *ptr; + int lbytes, lchars, lleft; +#ifdef MULTIBYTE_SUPPORT + mbstate_t mbs; +#endif + if (*c == 'b') { b = getkeystring(metafy(curarg, curlen, META_USEHEAP), - &l, + &lbytes, OPT_ISSET(ops,'b') ? GETKEYS_BINDKEY : GETKEYS_PRINTF_ARG, &nnl); } else { b = curarg; - l = curlen; + lbytes = curlen; + } + /* + * Handle width/precision here and use fwrite so that + * nul characters can be output. + * + * First, examine width of string given that it + * may contain multibyte characters. The output + * widths are for characters, so we need to count + * (in lchars). However, if we need to truncate + * the string we need the width in bytes (in lbytes). + */ + ptr = b; +#ifdef MULTIBYTE_SUPPORT + memset(&mbs, 0, sizeof(mbs)); +#endif + + for (lchars = 0, lleft = lbytes; lleft > 0; lchars++) { + int chars; + + if (lchars == prec) { + /* Truncate at this point. */ + lbytes = ptr - b; + break; + } +#ifdef MULTIBYTE_SUPPORT + if (isset(MULTIBYTE)) { + chars = mbrlen(ptr, lleft, &mbs); + if (chars < 0) { + /* + * Invalid/incomplete character at this + * point. Assume all the rest are a + * single byte. That's about the best we + * can do. + */ + lchars += lleft; + lbytes = (ptr - b) + lleft; + break; + } else if (chars == 0) { + /* NUL, handle as real character */ + chars = 1; + } + } + else /* use the non-multibyte code below */ +#endif + chars = 1; /* compiler can optimise this...*/ + lleft -= chars; + ptr += chars; } - /* handle width/precision here and use fwrite so that - * nul characters can be output */ - if (prec >= 0 && prec < l) l = prec; if (width > 0 && flags[2]) width = -width; - if (width > 0 && l < width) - count += fprintf(fout, "%*c", width - l, ' '); - count += fwrite(b, 1, l, fout); - if (width < 0 && l < -width) - count += fprintf(fout, "%*c", -width - l, ' '); + if (width > 0 && lchars < width) + count += fprintf(fout, "%*c", width - lchars, ' '); + count += fwrite(b, 1, lbytes, fout); + if (width < 0 && lchars < -width) + count += fprintf(fout, "%*c", -width - lchars, ' '); if (nnl) { /* If the %b arg had a \c escape, truncate the fmt. */ flen = c - fmt + 1; diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst index 828a5c573..ecac737a1 100644 --- a/Test/D07multibyte.ztst +++ b/Test/D07multibyte.ztst @@ -311,3 +311,7 @@ 0:Delimiters in parameter flags >barXX >YYYYYHIbar + + printf "%4.3s\n" főobar +0:Multibyte characters in printf widths +> főo -- cgit 1.4.1