From b16923b096b7678bbaa3cc28b216049d12563528 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 22 Mar 2010 19:46:53 +0000 Subject: 27812: display invalid bytes in multibyte characters specially --- ChangeLog | 9 ++++++++- Doc/Zsh/zle.yo | 14 ++++++++++++++ Src/Zle/zle.h | 14 ++++++++++++++ Src/Zle/zle_refresh.c | 12 +++++++++++- Src/Zle/zle_utils.c | 47 +++++++++++++++++++++++++++++++++++------------ 5 files changed, 82 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 222789bc2..a3f194dee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2010-03-22 Peter Stephenson + + * 27812: Doc/Zsh/zle.yo, Src/Zle/zle.h, Src/Zle/zle_refresh.c, + Src/Zle/zle_utils.c: when wchar_t contains Unicode code points, + use private area to put bytes that don't form characters for + special display. + 2010-03-22 Peter Stephenson * 27822: Src/hist.c, Src/lex.c, Src/zle_params.c, @@ -12949,5 +12956,5 @@ ***************************************************** * This is used by the shell to define $ZSH_PATCHLEVEL -* $Revision: 1.4941 $ +* $Revision: 1.4942 $ ***************************************************** diff --git a/Doc/Zsh/zle.yo b/Doc/Zsh/zle.yo index 91c13a563..0e2fea5bd 100644 --- a/Doc/Zsh/zle.yo +++ b/Doc/Zsh/zle.yo @@ -2286,6 +2286,20 @@ angle brackets. The number is the code point of the character in the wide character set; this may or may not be Unicode, depending on the operating system. ) +item(Invalid multibyte characters)( +If the tt(MULTIBYTE) option is in effect, any sequence of one or more +bytes that does not form a valid character in the current character +set is treated as a series of bytes each shown as a special character. +This case can be distinguished from other unprintable characters +as the bytes are represented as two hexadecimal digits between angle +brackets, as distinct from the four or eight digits that are used for +unprintable characters that are nonetheless valid in the current +character set. + +Not all systems support this: for it to work, the system's representation of +wide characters must be code values from the Universal Character Set, +as defined by IS0 10646 (also known as Unicode). +) enditem() If tt(zle_highlight) is not set or no value applies to a particular diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h index 577a4442f..32f3e59f6 100644 --- a/Src/Zle/zle.h +++ b/Src/Zle/zle.h @@ -419,6 +419,20 @@ typedef struct { typedef REFRESH_ELEMENT *REFRESH_STRING; +#if defined(MULTIBYTE_SUPPORT) && defined(__STDC_ISO_10646__) +#define ZSH_INVALID_WCHAR_BASE (0xe000U) +#define ZSH_INVALID_WCHAR_TEST(x) \ + ((unsigned)(x) >= ZSH_INVALID_WCHAR_BASE && \ + (unsigned)(x) <= (ZSH_INVALID_WCHAR_BASE + 255u)) +#define ZSH_INVALID_WCHAR_TO_CHAR(x) \ + ((char)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE)) +#define ZSH_INVALID_WCHAR_TO_INT(x) \ + ((int)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE)) +#define ZSH_CHAR_TO_INVALID_WCHAR(x) \ + ((wchar_t)(STOUC(x) + ZSH_INVALID_WCHAR_BASE)) +#endif + + #ifdef DEBUG #define METACHECK() \ DPUTS(zlemetaline == NULL, "line not metafied") diff --git a/Src/Zle/zle_refresh.c b/Src/Zle/zle_refresh.c index 8604317f3..352dcf0d6 100644 --- a/Src/Zle/zle_refresh.c +++ b/Src/Zle/zle_refresh.c @@ -1263,7 +1263,11 @@ zrefresh(void) } } #ifdef MULTIBYTE_SUPPORT - else if (iswprint(*t) && (width = WCWIDTH(*t)) > 0) { + else if ( +#ifdef __STDC_ISO_10646__ + !ZSH_INVALID_WCHAR_TEST(*t) && +#endif + iswprint(*t) && (width = WCWIDTH(*t)) > 0) { int ichars; if (width > rpms.sen - rpms.s) { int started = 0; @@ -1367,6 +1371,12 @@ zrefresh(void) wchar_t wc; int started = 0; +#ifdef __STDC_ISO_10646__ + if (ZSH_INVALID_WCHAR_TEST(*t)) { + int c = ZSH_INVALID_WCHAR_TO_INT(*t); + sprintf(dispchars, "<%.02x>", c); + } else +#endif if ((unsigned)*t > 0xffffU) { sprintf(dispchars, "<%.08x>", (unsigned)*t); } else { diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c index 2b2da7dcd..cc84eb8bb 100644 --- a/Src/Zle/zle_utils.c +++ b/Src/Zle/zle_utils.c @@ -120,11 +120,19 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf) size_t ret; char *ptr; - ret = wctomb(buf, inchar); - if (ret <= 0) { - /* Ick. */ - buf[0] = '?'; - return 1; +#ifdef __STDC_ISO_10646__ + if (ZSH_INVALID_WCHAR_TEST(inchar)) { + buf[0] = ZSH_INVALID_WCHAR_TO_CHAR(inchar); + ret = 1; + } else +#endif + { + ret = wctomb(buf, inchar); + if (ret <= 0) { + /* Ick. */ + buf[0] = '?'; + return 1; + } } ptr = buf + ret - 1; for (;;) { @@ -196,13 +204,20 @@ zlelineasstring(ZLE_STRING_T instr, int inll, int incs, int *outllp, for (i=0; i < inll; i++, incs--) { if (incs == 0) outcs = mb_len; - j = wcrtomb(s + mb_len, instr[i], &mbs); - if (j == -1) { - /* invalid char; what to do? */ - s[mb_len++] = ZWC('?'); - memset(&mbs, 0, sizeof(mbs)); - } else { - mb_len += j; +#ifdef __STDC_ISO_10646__ + if (ZSH_INVALID_WCHAR_TEST(instr[i])) { + s[mb_len++] = ZSH_INVALID_WCHAR_TO_CHAR(instr[i]); + } else +#endif + { + j = wcrtomb(s + mb_len, instr[i], &mbs); + if (j == -1) { + /* invalid char */ + s[mb_len++] = ZWC('?'); + memset(&mbs, 0, sizeof(mbs)); + } else { + mb_len += j; + } } } if (incs == 0) @@ -332,6 +347,13 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs) while (ll > 0) { size_t cnt = mbrtowc(outptr, inptr, ll, &mbs); +#ifdef __STDC_ISO_10646__ + if (cnt == MB_INCOMPLETE || cnt == MB_INVALID) { + /* Use private encoding for invalid single byte */ + *outptr = ZSH_CHAR_TO_INVALID_WCHAR(*inptr); + cnt = 1; + } +#else /* * At this point we don't handle either incomplete (-2) or * invalid (-1) multibyte sequences. Use the current length @@ -339,6 +361,7 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs) */ if (cnt == MB_INCOMPLETE || cnt == MB_INVALID) break; +#endif if (cnt == 0) { /* Converting '\0' returns 0, but a '\0' is a real -- cgit 1.4.1