diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2005-09-20 15:10:26 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2005-09-20 15:10:26 +0000 |
commit | 409296e22fb1cef515ccfff507c265a5fee0ab28 (patch) | |
tree | 7f15b9e7e175dd6302bf4c434886b263ceac84d8 /Src | |
parent | ce43e4a22c250fbaee14580b167986615455f78f (diff) | |
download | zsh-409296e22fb1cef515ccfff507c265a5fee0ab28.tar.gz zsh-409296e22fb1cef515ccfff507c265a5fee0ab28.tar.xz zsh-409296e22fb1cef515ccfff507c265a5fee0ab28.zip |
21736: improve tests for word and identifier characters with multibyte input
Diffstat (limited to 'Src')
-rw-r--r-- | Src/Zle/zle.h | 7 | ||||
-rw-r--r-- | Src/Zle/zle_main.c | 19 | ||||
-rw-r--r-- | Src/init.c | 3 | ||||
-rw-r--r-- | Src/params.c | 1 | ||||
-rw-r--r-- | Src/pattern.c | 4 | ||||
-rw-r--r-- | Src/utils.c | 98 |
6 files changed, 97 insertions, 35 deletions
diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h index 4b3f3f75a..fbfc02265 100644 --- a/Src/Zle/zle.h +++ b/Src/Zle/zle.h @@ -66,12 +66,7 @@ typedef wint_t ZLE_INT_T; #define ZC_iblank iswspace #define ZC_icntrl iswcntrl -/* - * TODO: doesn't work on arguments with side effects. - * Also YUK. Not even sure this is guaranteed to work. - * Should be easy to do along the lines of wcsiword. - */ -#define ZC_iident(x) (x < 256 && iident((int)x)) +#define ZC_iident wcsiident #define ZC_tolower towlower #define ZC_toupper towupper diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c index 1b62ff027..923145710 100644 --- a/Src/Zle/zle_main.c +++ b/Src/Zle/zle_main.c @@ -106,11 +106,6 @@ mod_export ZLE_INT_T lastchar_wide; /**/ mod_export int lastchar_wide_valid; - -/**/ -mod_export ZLE_STRING_T zle_wordchars; -#else -# define zle_wordchars wordchars; #endif /* the bindings for the previous and for this key */ @@ -1558,17 +1553,6 @@ trashzle(void) kungetct = 0; } -/**/ -mod_export void -wordcharstrigger(void) -{ -#ifdef ZLE_UNICODE_SUPPORT - zrealloc(zle_wordchars, strlen(wordchars)*MB_CUR_MAX); - mbsrtowcs(zle_wordchars, (const char **)&wordchars, - strlen(wordchars), NULL); - /* TODO: error handling here */ -#endif -} /* Hook functions. Used to allow access to zle parameters if zle is * active. */ @@ -1636,8 +1620,6 @@ setup_(UNUSED(Module m)) kungetbuf = (char *) zalloc(kungetsz = 32); comprecursive = 0; rdstrs = NULL; - wordcharstriggerptr = wordcharstrigger; - wordcharstrigger(); /* initialise the keymap system */ init_keymaps(); @@ -1712,7 +1694,6 @@ finish_(UNUSED(Module m)) zlegetlineptr = NULL; zlereadptr = fallback_zleread; zlesetkeymapptr= noop_function_int; - wordcharstriggerptr = noop_function; getkeyptr = NULL; diff --git a/Src/init.c b/Src/init.c index de6d4efcb..716898e28 100644 --- a/Src/init.c +++ b/Src/init.c @@ -1180,9 +1180,6 @@ mod_export ZleVoidIntFn zlesetkeymapptr = noop_function_int; #endif /* !LINKED_XMOD_zshQszle */ /**/ -mod_export ZleVoidFn wordcharstriggerptr = noop_function; - -/**/ unsigned char * autoload_zleread(char **lp, char **rp, int ha, int con) { diff --git a/Src/params.c b/Src/params.c index 89d25afee..218744000 100644 --- a/Src/params.c +++ b/Src/params.c @@ -3346,7 +3346,6 @@ wordcharssetfn(UNUSED(Param pm), char *x) zsfree(wordchars); wordchars = x; inittyptab(); - wordcharstriggerptr(); } /* Function to get value for special parameter `_' */ diff --git a/Src/pattern.c b/Src/pattern.c index 393d9bf41..36578226c 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2749,6 +2749,10 @@ patmatchrange(char *range, int ch) return 1; break; case PP_WORD: + /* + * HERE: when we support multibyte characters, + * this test needs to be wcsiword(). + */ if (iword(ch)) return 1; break; diff --git a/Src/utils.c b/Src/utils.c index 71af531c3..dce10beee 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -35,6 +35,16 @@ /**/ char *scriptname; +#ifdef ZLE_UNICODE_SUPPORT +/* + * The wordchars variable turned into a wide character array. + * This is much more convenient for testing. + */ + +/**/ +mod_export wchar_t *wordchars_wide; +#endif + /* Print an error */ /**/ @@ -2456,8 +2466,18 @@ inittyptab(void) typtab[t0] = IDIGIT | IALNUM | IWORD | IIDENT | IUSER; for (t0 = 'a'; t0 <= 'z'; t0++) typtab[t0] = typtab[t0 - 'a' + 'A'] = IALPHA | IALNUM | IIDENT | IUSER | IWORD; +#ifndef ZLE_UNICODE_SUPPORT + /* + * This really doesn't seem to me the right thing to do when + * we have multibyte character support... it was a hack to assume + * eight bit characters `worked' for some values of work before + * we could test for them properly. I'm not 100% convinced + * having IIDENT here is a good idea at all, but this code + * should disappear into history... + */ for (t0 = 0240; t0 != 0400; t0++) typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD; +#endif typtab['_'] = IIDENT | IUSER; typtab['-'] = IUSER; typtab[' '] |= IBLANK | INBLANK; @@ -2477,8 +2497,44 @@ inittyptab(void) } typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= ISEP; } - for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) - typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= IWORD; +#ifdef ZLE_UNICODE_SUPPORT + if (wordchars) { + const char *wordchars_ptr = wordchars; + mbstate_t mbs; + int nchars; + + memset(&mbs, 0, sizeof(mbs)); + wordchars_wide = (wchar_t *) + zrealloc(wordchars_wide, (strlen(wordchars)+1)*sizeof(wchar_t)); + nchars = mbsrtowcs(wordchars_wide, &wordchars_ptr, strlen(wordchars), + &mbs); + if (nchars == -1) { + /* Conversion state is undefined: better just set to null */ + *wordchars_wide = L'\0'; + } else { + wordchars_wide[nchars] = L'\0'; + } + } else { + wordchars_wide = zrealloc(wordchars_wide, sizeof(wchar_t)); + *wordchars_wide = L'\0'; + } +#endif + for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) { + int c = STOUC(*s == Meta ? *++s ^ 32 : *s); +#ifdef ZLE_UNICODE_SUPPORT + if (!isascii(c)) { + /* + * If we have support for multibyte characters, we don't + * handle non-ASCII characters here; instead, we turn + * wordchars into a wide character array. + * (We may actually have a single-byte 8-bit character set, + * but it works the same way.) + */ + continue; + } +#endif + typtab[c] |= IWORD; + } for (s = SPECCHARS; *s; s++) typtab[STOUC(*s)] |= ISPECIAL; if (isset(BANGHIST) && bangchar && interact && isset(SHINSTDIN)) @@ -2503,9 +2559,6 @@ wcsiword(wchar_t c) * produces an ASCII character. If it does, use iword on that. * If it doesn't, use iswalnum on the original character. This * is pretty good most of the time. - * - * TODO: extend WORDCHARS to handle multibyte chars by some kind - * of hierarchical list or hash table. */ len = wctomb(outstr, c); @@ -2515,7 +2568,40 @@ wcsiword(wchar_t c) } else if (len == 1 && isascii(*outstr)) { return iword(*outstr); } else { - return iswalnum(c); + return iswalnum(c) || wcschr(wordchars_wide, c); + } +} + +/* + * iident() macro extended to support wide characters. + * + * The macro is intended to test if a character is allowed in an + * internal zsh identifier. Until the main shell handles multibyte + * characters it's not a good idea to allow characters other than + * ASCII characters; it would cause zle to allow characters that + * the main shell would reject. Eventually we should be able + * to allow all alphanumerics. + * + * Otherwise similar to wcsiword. + */ + +/**/ +mod_export int +wcsiident(wchar_t c) +{ + int len; + VARARR(char, outstr, MB_CUR_MAX); + + len = wctomb(outstr, c); + + if (len == 0) { + /* NULL is special */ + return 0; + } else if (len == 1 && isascii(*outstr)) { + return iword(*outstr); + } else { + /* not currently allowed, see above */ + return 0; } } #endif |