From bd678526b3360851468555fca8bb5d53fef11218 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Sat, 17 Sep 2005 21:39:07 +0000 Subject: 21731: handle multibyte characters in prompts correctly --- Src/Zle/zle_refresh.c | 49 +++++++++++++++----- Src/prompt.c | 121 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 133 insertions(+), 37 deletions(-) (limited to 'Src') diff --git a/Src/Zle/zle_refresh.c b/Src/Zle/zle_refresh.c index ce90be845..a8c47e843 100644 --- a/Src/Zle/zle_refresh.c +++ b/Src/Zle/zle_refresh.c @@ -30,7 +30,13 @@ #include "zle.mdh" #include "zle_refresh.pro" -/* Expanded prompts */ +/* + * Expanded prompts. + * + * These are always output from the start, except in the special + * case where we are sure each character in the prompt corresponds + * to a character on screen. + */ /**/ char *lpromptbuf, *rpromptbuf; @@ -202,7 +208,9 @@ resetvideo(void) } } - /* TODO currently zsh core is not using widechars */ + /* + * countprompt() now correctly handles multibyte input. + */ countprompt(lpromptbuf, &lpromptwof, &lprompth, 1); countprompt(rpromptbuf, &rpromptw, &rprompth, 0); if (lpromptwof != winw) @@ -312,7 +320,11 @@ static int cleareol, /* clear to end-of-line (if can't cleareod) */ oxtabs, /* oxtabs - tabs expand to spaces if set */ numscrolls, onumscrolls; -/* TODO currently it assumes sceenwidth 1 for every character */ +/* + * TODO currently it assumes sceenwidth 1 for every character + * (except for characters in the prompt which are correctly handled + * by wcwidth()). + */ /**/ mod_export void zrefresh(void) @@ -449,7 +461,7 @@ zrefresh(void) if (termflags & TERM_SHORT) vcs = 0; else if (!clearflag && lpromptbuf[0]) { - zputs(lpromptbuf, shout); /* TODO convert to wide characters */ + zputs(lpromptbuf, shout); if (lpromptwof == winw) zputs("\n", shout); /* works with both hasam and !hasam */ } else { @@ -622,7 +634,6 @@ zrefresh(void) if (trashedzle && opts[TRANSIENTRPROMPT]) put_rpmpt = 0; else - /* TODO (r)promptbuf will be widechar */ put_rpmpt = rprompth == 1 && rpromptbuf[0] && !strchr(rpromptbuf, '\t') && (int)ZS_strlen(nbuf[0]) + rpromptw < winw - 1; @@ -677,7 +688,6 @@ zrefresh(void) /* output the right-prompt if appropriate */ if (put_rpmpt && !ln && !oput_rpmpt) { moveto(0, winw - 1 - rpromptw); - /* TODO it will be wide char at some point */ zputs(rpromptbuf, shout); vcs = winw - 1; /* reset character attributes to that set by the main prompt */ @@ -1114,11 +1124,28 @@ tc_rightcurs(int ct) /* otherwise _carefully_ write the contents of the video buffer. if we're anywhere in the prompt, goto the left column and write the whole - prompt out unless ztrlen(lpromptbuf) == lpromptw : we can cheat then */ + prompt out. + + If strlen(lpromptbuf) == lpromptw, we can cheat and output + the appropriate chunk of the string. This test relies on the + fact that any funny business will always make the length of + the string larger than the printing width, so if they're the same + we have only ASCII characters or a single-byte extension of ASCII. + Unfortunately this trick won't work if there are potentially + characters occupying more than one column. We could flag that + this has happened (since it's not that common to have characters + wider than one column), but for now it's easier not to use the + trick if we are using wcwidth() on the prompt. It's not that + common to be editing in the middle of the prompt anyway, I would + think. + */ if (vln == 0 && i < lpromptw && !(termflags & TERM_SHORT)) { +#ifndef ZLE_UNICODE_SUPPORT if ((int)strlen(lpromptbuf) == lpromptw) fputs(lpromptbuf + i, shout); - else if (tccan(TCRIGHT) && (tclen[TCRIGHT] * ct <= ztrlen(lpromptbuf))) + else +#endif + if (tccan(TCRIGHT) && (tclen[TCRIGHT] * ct <= ztrlen(lpromptbuf))) /* it is cheaper to send TCRIGHT than reprint the whole prompt */ for (ct = lpromptw - i; ct--; ) tcout(TCRIGHT); @@ -1126,7 +1153,7 @@ tc_rightcurs(int ct) if (i != 0) zputc('\r'); tc_upcurs(lprompth - 1); - zputs(lpromptbuf, shout); /* TODO wide character */ + zputs(lpromptbuf, shout); if (lpromptwof == winw) zputs("\n", shout); /* works with both hasam and !hasam */ } @@ -1238,9 +1265,6 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) /* * Convert the entire lprompt so that we know how to count * characters. - * - * TODO screen widths are still not correct, indeed lpromptw knows - * nothing about multibyte characters so may be too long. */ lpend = strchr(lpromptbuf, 0); /* Worst case number of characters, not null-terminated */ @@ -1258,6 +1282,7 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) /* dunno, try to recover */ lpptr++; *lpwp++ = ZWC('?'); + memset(&ps, '\0', sizeof(ps)); } } if (lpwp - lpwbuf < lpromptw) { diff --git a/Src/prompt.c b/Src/prompt.c index fd2977ab6..27a07c574 100644 --- a/Src/prompt.c +++ b/Src/prompt.c @@ -804,10 +804,15 @@ putstr(int d) return 0; } -/* Count height etc. of a prompt string returned by promptexpand(). * - * This depends on the current terminal width, and tabs and * - * newlines require nontrivial processing. * - * Passing `overf' as -1 means to ignore columns (absolute width). */ +/* + * Count height etc. of a prompt string returned by promptexpand(). + * This depends on the current terminal width, and tabs and + * newlines require nontrivial processing. + * Passing `overf' as -1 means to ignore columns (absolute width). + * + * If multibyte is enabled, take account of multibyte characters + * by locating them and finding out their screen width. + */ /**/ mod_export void @@ -815,29 +820,92 @@ countprompt(char *str, int *wp, int *hp, int overf) { int w = 0, h = 1; int s = 1; - for(; *str; str++) { - if(w >= columns && overf >= 0) { +#ifdef ZLE_UNICODE_SUPPORT + int mbret, wcw, multi = 0; + char inchar; + mbstate_t mbs; + wchar_t wc; + + memset(&mbs, 0, sizeof(mbs)); +#endif + + for (; *str; str++) { + if (w >= columns && overf >= 0) { w = 0; h++; } - if(*str == Meta) - str++; - if(*str == Inpar) + /* + * Input string should be metafied, so tokens in it should + * be real tokens, even if there are multibyte characters. + */ + if (*str == Inpar) s = 0; - else if(*str == Outpar) + else if (*str == Outpar) s = 1; - else if(*str == Nularg) + else if (*str == Nularg) + w++; + else if (s) { + if (*str == Meta) { +#ifdef ZLE_UNICODE_SUPPORT + inchar = *++str ^ 32; +#else + str++; +#endif + } else { +#ifdef ZLE_UNICODE_SUPPORT + /* + * Don't look for tab or newline in the middle + * of a multibyte character. Otherwise, we are + * relying on the character set being an extension + * of ASCII so it's safe to test a single byte. + */ + if (multi) { +#endif + if (*str == '\t') { + w = (w | 7) + 1; + continue; + } else if (*str == '\n') { + w = 0; + h++; + continue; + } +#ifdef ZLE_UNICODE_SUPPORT + } + + inchar = *str; +#endif + } + +#ifdef ZLE_UNICODE_SUPPORT + mbret = mbrtowc(&wc, &inchar, 1, &mbs); + if (mbret >= -1) { + if (mbret > 0) { + /* + * If the character isn't printable, this returns -1. + */ + wcw = wcwidth(wc); + if (wcw > 0) + w += wcw; + } + /* + * else invalid character or possibly null: assume no + * output + */ + multi = 0; + } else { + /* else character is incomplete, keep looking. */ + multi = 1; + } +#else w++; - else if(s) { - if(*str == '\t') - w = (w | 7) + 1; - else if(*str == '\n') { - w = 0; - h++; - } else - w++; +#endif } } + /* + * multi may still be set if we were in the middle of the character. + * This isn't easy to handle generally; just assume there's no + * output. + */ if(w >= columns && overf >= 0) { if (!overf || w > columns) { w = 0; @@ -901,12 +969,15 @@ prompttrunc(int arg, int truncchar, int doprint, int endchar) countprompt(ptr, &w, 0, -1); if (w > trunclen) { /* - * We need to truncate. t points to the truncation string -- * - * which is inserted literally, without nice representation. * - * tlen is its length, and maxlen is the amount of the main * - * string that we want to keep. Note that if the truncation * - * string is longer than the truncation length (tlen > * - * trunclen), the truncation string is used in full. * + * We need to truncate. t points to the truncation string -- + * which is inserted literally, without nice representation. + * tlen is its length, and maxlen is the amount of the main + * string that we want to keep. Note that if the truncation + * string is longer than the truncation length (tlen > + * trunclen), the truncation string is used in full. + * + * TODO: we don't take account of multibyte characters + * in the string we're truncating. */ char *t = truncstr; int fullen = bp - ptr; -- cgit 1.4.1