diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2005-02-18 13:57:25 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2005-02-18 13:57:25 +0000 |
commit | d7c13fb2c3b1b014acde9c1cb17a1e34239b9751 (patch) | |
tree | 7b31d7d08233e1cc1b1ab46af1ac44b25ed4f2c1 /Src/Zle/zle_main.c | |
parent | 294ef9e87237bf1dc12b17a26bc4b22aa5604282 (diff) | |
download | zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.tar.gz zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.tar.xz zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.zip |
20822: Initial code for Unicode/multibyte input
20823: Debugging test in stat wrong for 64-bit systems
Diffstat (limited to 'Src/Zle/zle_main.c')
-rw-r--r-- | Src/Zle/zle_main.c | 151 |
1 files changed, 134 insertions, 17 deletions
diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c index d25376862..1f7b7cbfa 100644 --- a/Src/Zle/zle_main.c +++ b/Src/Zle/zle_main.c @@ -78,10 +78,30 @@ int done; /**/ int mark; -/* last character pressed */ +/* + * Last character pressed. + * + * Depending how far we are with processing, the lastcharacter may + * be a single byte read (lastchar_wide_valid is 0, lastchar_wide is not + * valid) or a full wide character. This is needed because we can't be + * sure whether the user is typing old \M-style commands or multibyte + * input. + * + * Calling getfullchar or getrestchar is guaranteed to ensure we have + * a valid wide character (although this may be WEOF). In many states + * we know this and don't need to test lastchar_wide_valid. + */ /**/ -mod_export int lastchar; +mod_export int +lastchar; +#ifdef ZLE_UNICODE_SUPPORT +/**/ +mod_export ZLE_INT_T lastchar_wide; +/**/ +mod_export int +lastchar_wide_valid; +#endif /* the bindings for the previous and for this key */ @@ -148,7 +168,7 @@ mod_export struct modifier zmod; /**/ int prefixflag; -/* Number of characters waiting to be read by the ungetkeys mechanism */ +/* Number of characters waiting to be read by the ungetbytes mechanism */ /**/ int kungetct; @@ -196,7 +216,7 @@ zsetterm(void) * we can't set up the terminal for zle *at all* until * we are sure there is no more typeahead to come. So * if there is typeahead, we set the flag delayzsetterm. - * Then getkey() performs another FIONREAD call; if that is + * Then getbyte() performs another FIONREAD call; if that is * 0, we have finally used up all the typeahead, and it is * safe to alter the terminal, which we do at that point. */ @@ -266,7 +286,7 @@ zsetterm(void) ti.tio.c_cc[VMIN] = 1; ti.tio.c_cc[VTIME] = 0; ti.tio.c_iflag |= (INLCR | ICRNL); - /* this line exchanges \n and \r; it's changed back in getkey + /* this line exchanges \n and \r; it's changed back in getbyte so that the net effect is no change at all inside the shell. This double swap is to allow typeahead in common cases, eg. @@ -275,12 +295,12 @@ zsetterm(void) echo foo<return> <--- typed before sleep returns The shell sees \n instead of \r, since it was changed by the kernel - while zsh wasn't looking. Then in getkey() \n is changed back to \r, + while zsh wasn't looking. Then in getbyte() \n is changed back to \r, and it sees "echo foo<accept line>", as expected. Without the double swap the shell would see "echo foo\n", which is translated to "echo fooecho foo<accept line>" because of the binding. Note that if you type <line-feed> during the sleep the shell just sees - \n, which is translated to \r in getkey(), and you just get another + \n, which is translated to \r in getbyte(), and you just get another prompt. For type-ahead to work in ALL cases you have to use stty inlcr. @@ -321,9 +341,16 @@ zsetterm(void) static char *kungetbuf; static int kungetsz; +/* + * Note on ungetbyte and ungetbytes for the confused (pws): + * these are low level and deal with bytes before they + * have been converted into (possibly wide) characters. + * Hence the names. + */ + /**/ void -ungetkey(int ch) +ungetbyte(int ch) { if (kungetct == kungetsz) kungetbuf = realloc(kungetbuf, kungetsz *= 2); @@ -332,11 +359,11 @@ ungetkey(int ch) /**/ void -ungetkeys(char *s, int len) +ungetbytes(char *s, int len) { s += len; while (len--) - ungetkey(*--s); + ungetbyte(*--s); } #if defined(pyr) && defined(HAVE_SELECT) @@ -356,7 +383,7 @@ breakread(int fd, char *buf, int n) #endif static int -raw_getkey(int keytmout, char *cptr) +raw_getbyte(int keytmout, char *cptr) { long exp100ths; int ret; @@ -591,13 +618,22 @@ raw_getkey(int keytmout, char *cptr) /**/ mod_export int -getkey(int keytmout) +getbyte(int keytmout) { char cc; unsigned int ret; int die = 0, r, icnt = 0; int old_errno = errno, obreaks = breaks; +#ifdef ZLE_UNICODE_SUPPORT + /* + * Reading a single byte always invalidates the status + * of lastchar_wide. We may fix this up in getrestchar + * if this is the last byte of a wide character. + */ + lastchar_wide_valid = 0; +#endif + if (kungetct) ret = STOUC(kungetbuf[--kungetct]); else { @@ -612,10 +648,10 @@ getkey(int keytmout) for (;;) { int q = queue_signal_level(); dont_queue_signals(); - r = raw_getkey(keytmout, &cc); + r = raw_getbyte(keytmout, &cc); restore_queue_signals(q); if (r == -2) /* timeout */ - return EOF; + return lastchar = EOF; if (r == 1) break; if (r == 0) { @@ -642,7 +678,7 @@ getkey(int keytmout) errflag = 0; breaks = obreaks; errno = old_errno; - return EOF; + return lastchar = EOF; } else if (errno == EWOULDBLOCK) { fcntl(0, F_SETFL, 0); } else if (errno == EIO && !die) { @@ -665,15 +701,96 @@ getkey(int keytmout) ret = STOUC(cc); } + /* + * TODO: if vichgbuf is to be characters instead of a multibyte + * string the following needs moving to getfullchar(). + */ if (vichgflag) { if (vichgbufptr == vichgbufsz) vichgbuf = realloc(vichgbuf, vichgbufsz *= 2); vichgbuf[vichgbufptr++] = ret; } errno = old_errno; - return ret; + return lastchar = ret; } + +/* + * Get a full character rather than just a single byte. + * (TODO: Strictly we ought to call this getbyte and the above + * function getbyte.) + */ + +/**/ +mod_export ZLE_INT_T +getfullchar(int keytmout) +{ + int inchar = getbyte(keytmout); + +#ifdef ZLE_UNICODE_SUPPORT + return getrestchar(inchar); +#else + return inchar; +#endif +} + + +/**/ +#ifdef ZLE_UNICODE_SUPPORT +/* + * Get the remainder of a character if we support multibyte + * input strings. It may not require any more input, but + * we haven't yet checked. The character previously returned + * by getbyte() is passed down as inchar. + */ + +/**/ +mod_export ZLE_INT_T +getrestchar(int inchar) +{ + char cnull = '\0'; + char buf[MB_CUR_MAX], *ptr; + wchar_t outchar; + int ret; + + /* + * We are guaranteed to set a valid wide last character, + * although it may be WEOF (which is technically not + * a wide character at all...) + */ + lastchar_wide_valid = 1; + + if (inchar == EOF) + return lastchar_wide = WEOF; + + /* reset shift state by converting null */ + mbrtowc(&outchar, &cnull, 1, &ps); + + ptr = buf; + *ptr++ = inchar; + /* + * Return may be zero if we have a NULL; handle this like + * any other character. + */ + while ((ret = mbrtowc(&outchar, buf, ptr - buf, &ps)) < 0) { + if (ret == -1) { + /* + * Invalid input. Hmm, what's the right thing to do here? + */ + return lastchar_wide = WEOF; + } + /* No timeout here as we really need the character. */ + inchar = getbyte(0); + if (inchar == EOF) + return lastchar_wide = WEOF; + *ptr++ = inchar; + } + return lastchar_wide = (wint_t)outchar; +} +/**/ +#endif + + /**/ void zlecore(void) @@ -1445,7 +1562,7 @@ setup_(UNUSED(Module m)) zlereadptr = zleread; zlesetkeymapptr = zlesetkeymap; - getkeyptr = getkey; + getkeyptr = getbyte; /* initialise the thingies */ init_thingies(); |