From d7c13fb2c3b1b014acde9c1cb17a1e34239b9751 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Fri, 18 Feb 2005 13:57:25 +0000 Subject: 20822: Initial code for Unicode/multibyte input 20823: Debugging test in stat wrong for 64-bit systems --- Src/Zle/complist.c | 8 ++- Src/Zle/deltochar.c | 3 +- Src/Zle/zle.h | 69 +++++++++++++++++++++++ Src/Zle/zle_hist.c | 47 +++++++++++----- Src/Zle/zle_keymap.c | 37 +++++++++++-- Src/Zle/zle_main.c | 151 +++++++++++++++++++++++++++++++++++++++++++++------ Src/Zle/zle_misc.c | 139 +++++++++++++++++++++++++++++++++++------------ Src/Zle/zle_move.c | 29 +++++----- Src/Zle/zle_thingy.c | 2 +- Src/Zle/zle_tricky.c | 17 +++++- Src/Zle/zle_utils.c | 14 ++--- Src/Zle/zle_vi.c | 72 ++++++++++++++++-------- 12 files changed, 466 insertions(+), 122 deletions(-) (limited to 'Src/Zle') diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c index 1cc709817..748b1fdf7 100644 --- a/Src/Zle/complist.c +++ b/Src/Zle/complist.c @@ -1869,6 +1869,10 @@ msearch(Cmatch **ptr, int ins, int back, int rep, int *wrapp) msearchpush(ptr, back); if (ins) { + /* + * TODO: probably need to convert back to multibyte character + * string? Who knows... + */ s[0] = lastchar; s[1] = '\0'; @@ -2802,9 +2806,7 @@ domenuselect(Hookdef dummy, Chdata dat) } } if (cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if (lastchar == '\r') - lastchar = '\n'; + fixunmeta(); } wrap = 0; np = msearch(p, ins, (ins ? (mode == MM_BSEARCH) : back), diff --git a/Src/Zle/deltochar.c b/Src/Zle/deltochar.c index 545c34c9f..d25f99680 100644 --- a/Src/Zle/deltochar.c +++ b/Src/Zle/deltochar.c @@ -37,7 +37,8 @@ static Widget w_zaptochar; static int deltochar(UNUSED(char **args)) { - int c = getkey(0), dest = zlecs, ok = 0, n = zmult; + ZLE_INT_T c = getfullchar(0); + int dest = zlecs, ok = 0, n = zmult; int zap = (bindk->widget == w_zaptochar); if (n > 0) { diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h index 3b9845f8a..a76d08e2d 100644 --- a/Src/Zle/zle.h +++ b/Src/Zle/zle.h @@ -27,6 +27,75 @@ * */ +#ifdef ZLE_UNICODE_SUPPORT +typedef wchar_t ZLE_CHAR_T; +typedef wchar_t *ZLE_STRING_T; +typedef int_t ZLE_INT_T; +#define ZLE_CHAR_SIZE sizeof(wchar_t) + +/* + * MB_CUR_MAX is the maximum number of bytes that a single wide + * character will convert into. We use it to keep strings + * sufficiently long. It should always be defined, but if it isn't + * just assume we are using Unicode which requires 6 characters. + * (Note that it's not necessarily defined to a constant.) + */ +#ifndef MB_CUR_MAX +#define MB_CUR_MAX 6 +#endif + +#define ZLENL L'\n' +#define ZLENUL L'\0' +#define ZLETAB L'\t' + +#define DIGIT_1 L'1' +#define DIGIT_9 L'9' +#define LETTER_a L'a' +#define LETTER_z L'z' +#define LETTER_A L'A' +#define LETTER_Z L'Z' +#define LETTER_y L'y' +#define LETTER_n L'n' + +#define ZLENULSTR L"" +#define ZLEEOF WEOF +#define ZS_memcpy wmemcpy +#define ZS_memmove wmemmove +#define ZC_icntrl iswcntrl + +#define LASTFULLCHAR lastchar_wide + +#else /* Not ZLE_UNICODE_SUPPORT: old single-byte code */ + +typedef int ZLE_CHAR_T; +typedef unsigned char *ZLE_STRING_T; +typedef int ZLE_INT_T; +#define ZLE_CHAR_SIZE sizeof(unsigned char) + +#define ZLENL '\n' +#define ZLENUL '\0' +#define ZLETAB '\t' + +#define DIGIT_1 '1' +#define DIGIT_9 '9' +#define LETTER_a 'a' +#define LETTER_z 'z' +#define LETTER_A 'A' +#define LETTER_Z 'Z' +#define LETTER_y 'y' +#define LETTER_n 'n' + +#define ZLENULSTR "" +#define ZLEEOF EOF +#define ZS_memcpy memcpy +#define ZS_memmove memmove +#define ZC_icntrl icntrl + +#define LASTFULLCHAR lastchar + +#endif + + typedef struct widget *Widget; typedef struct thingy *Thingy; diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c index 8cd082b72..afad7ae44 100644 --- a/Src/Zle/zle_hist.c +++ b/Src/Zle/zle_hist.c @@ -420,11 +420,12 @@ endofhistory(UNUSED(char **args)) int insertlastword(char **args) { - int n, nwords, histstep = -1, wordpos = 0, deleteword = 0; + int n, nwords, histstep = -1, wordpos = 0, deleteword = 0, len, sz; char *s, *t; Histent he = NULL; LinkList l = NULL; LinkNode node; + ZLE_STRING_T zs; static char *lastinsert; static int lasthist, lastpos, lastlen; @@ -554,7 +555,10 @@ insertlastword(char **args) memcpy(lastinsert, s, lastlen); n = zmult; zmult = 1; - doinsert(s); + + zs = stringaszleline((unsigned char *)s, &len, &sz); + doinsert(zs, len); + zfree(zs, sz); zmult = n; *t = save; return 0; @@ -780,7 +784,7 @@ doisearch(char **args, int dir) char *arg; savekeys = kungetct; arg = getkeystring(*args, &len, 2, NULL); - ungetkeys(arg, len); + ungetbytes(arg, len); } strcpy(ibuf, ISEARCH_PROMPT); @@ -951,18 +955,23 @@ doisearch(char **args, int dir) sbuf[sbptr] = '^'; zrefresh(); } - if ((lastchar = getkey(0)) == EOF) + if (getfullchar(0) == ZLEEOF) feep = 1; else goto ins; } else { if(cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if(lastchar == '\r') - lastchar = '\n'; - } else if (cmd == Th(z_magicspace)) - lastchar = ' '; - else if (cmd != Th(z_selfinsert)) { + fixunmeta(); + } else if (cmd == Th(z_magicspace)) { + fixmagicspace(); + } else if (cmd == Th(z_selfinsert)) { +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getfullcharrest(lastchar); +#else + ; +#endif + } else { ungetkeycmd(); if (cmd == Th(z_sendbreak)) sbptr = 0; @@ -979,6 +988,8 @@ doisearch(char **args, int dir) sbuf = ibuf + FIRST_SEARCH_CHAR; sibuf *= 2; } + /* TODO: use lastchar_wide if available, convert back to + * multibyte string. Yuk. */ sbuf[sbptr++] = lastchar; } if (feep) @@ -1093,7 +1104,7 @@ getvisrchstr(void) break; } if(cmd == Th(z_magicspace)) { - lastchar = ' '; + fixmagicspace(); cmd = Th(z_selfinsert); } if(cmd == Th(z_redisplay)) { @@ -1128,15 +1139,20 @@ getvisrchstr(void) sbuf[sptr] = '^'; zrefresh(); } - if ((lastchar = getkey(0)) == EOF) + if (getfullchar(0) == ZLEEOF) feep = 1; else goto ins; } else if(cmd == Th(z_selfinsertunmeta) || cmd == Th(z_selfinsert)) { if(cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if(lastchar == '\r') - lastchar = '\n'; + fixunmeta(); + } else { +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getrestchar(lastchar); +#else + ; +#endif } ins: if(sptr == ssbuf - 1) { @@ -1144,6 +1160,7 @@ getvisrchstr(void) strcpy(newbuf, sbuf); statusline = sbuf = newbuf; } + /* TODO: may be wide char, convert back to multibyte string */ sbuf[sptr++] = lastchar; } else { feep = 1; diff --git a/Src/Zle/zle_keymap.c b/Src/Zle/zle_keymap.c index 3aee499f0..9b83a4953 100644 --- a/Src/Zle/zle_keymap.c +++ b/Src/Zle/zle_keymap.c @@ -1272,7 +1272,21 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp) keybuflen = 0; keybuf[0] = 0; - while((lastchar = getkeybuf(!!lastlen)) != EOF) { + /* + * getkeybuf returns multibyte strings, which may not + * yet correspond to complete wide characters, regardless + * of the locale. This is because we can't be sure whether + * the key bindings and keyboard input always return such + * characters. So we always look up bindings for each + * chunk of string. Intelligence within self-insert tries + * to fix up insertion of real wide characters properly. + * + * Note that this does not stop the user binding wide characters to + * arbitrary functions, just so long as the string used in the + * argument to bindkey is in the correct form for the locale. + * That's beyond our control. + */ + while(getkeybuf(!!lastlen) != EOF) { char *s; Thingy f; int loc = 1; @@ -1296,7 +1310,7 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp) lastchar = lastc; if(lastlen != keybuflen) { unmetafy(keybuf + lastlen, &keybuflen); - ungetkeys(keybuf+lastlen, keybuflen); + ungetbytes(keybuf+lastlen, keybuflen); if(vichgflag) vichgbufptr -= keybuflen; keybuf[lastlen] = 0; @@ -1306,11 +1320,24 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp) return keybuf; } +/* + * Add a (possibly metafied) byte to the key input so far. + * This handles individual bytes of a multibyte string separately; + * see note in getkeymapcmd. Hence there is no wide character + * support at this level. + * + * TODO: Need to be careful about whether we return EOF in the + * middle of a wide character. However, I think we're OK since + * EOF and 0xff are distinct and we're reading bytes from the + * lower level, so EOF really does mean something went wrong. Even so, + * I'm worried enough to leave this note here for now. + */ + /**/ static int getkeybuf(int w) { - int c = getkey(w); + int c = getbyte(w); if(c < 0) return EOF; @@ -1332,7 +1359,7 @@ getkeybuf(int w) mod_export void ungetkeycmd(void) { - ungetkeys(keybuf, keybuflen); + ungetbytes(keybuf, keybuflen); } /* read a command from the current keymap, with widgets */ @@ -1359,7 +1386,7 @@ getkeycmd(void) return NULL; } pb = unmetafy(ztrdup(str), &len); - ungetkeys(pb, len); + ungetbytes(pb, len); zfree(pb, strlen(str) + 1); goto sentstring; } diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c index d25376862..1f7b7cbfa 100644 --- a/Src/Zle/zle_main.c +++ b/Src/Zle/zle_main.c @@ -78,10 +78,30 @@ int done; /**/ int mark; -/* last character pressed */ +/* + * Last character pressed. + * + * Depending how far we are with processing, the lastcharacter may + * be a single byte read (lastchar_wide_valid is 0, lastchar_wide is not + * valid) or a full wide character. This is needed because we can't be + * sure whether the user is typing old \M-style commands or multibyte + * input. + * + * Calling getfullchar or getrestchar is guaranteed to ensure we have + * a valid wide character (although this may be WEOF). In many states + * we know this and don't need to test lastchar_wide_valid. + */ /**/ -mod_export int lastchar; +mod_export int +lastchar; +#ifdef ZLE_UNICODE_SUPPORT +/**/ +mod_export ZLE_INT_T lastchar_wide; +/**/ +mod_export int +lastchar_wide_valid; +#endif /* the bindings for the previous and for this key */ @@ -148,7 +168,7 @@ mod_export struct modifier zmod; /**/ int prefixflag; -/* Number of characters waiting to be read by the ungetkeys mechanism */ +/* Number of characters waiting to be read by the ungetbytes mechanism */ /**/ int kungetct; @@ -196,7 +216,7 @@ zsetterm(void) * we can't set up the terminal for zle *at all* until * we are sure there is no more typeahead to come. So * if there is typeahead, we set the flag delayzsetterm. - * Then getkey() performs another FIONREAD call; if that is + * Then getbyte() performs another FIONREAD call; if that is * 0, we have finally used up all the typeahead, and it is * safe to alter the terminal, which we do at that point. */ @@ -266,7 +286,7 @@ zsetterm(void) ti.tio.c_cc[VMIN] = 1; ti.tio.c_cc[VTIME] = 0; ti.tio.c_iflag |= (INLCR | ICRNL); - /* this line exchanges \n and \r; it's changed back in getkey + /* this line exchanges \n and \r; it's changed back in getbyte so that the net effect is no change at all inside the shell. This double swap is to allow typeahead in common cases, eg. @@ -275,12 +295,12 @@ zsetterm(void) echo foo <--- typed before sleep returns The shell sees \n instead of \r, since it was changed by the kernel - while zsh wasn't looking. Then in getkey() \n is changed back to \r, + while zsh wasn't looking. Then in getbyte() \n is changed back to \r, and it sees "echo foo", as expected. Without the double swap the shell would see "echo foo\n", which is translated to "echo fooecho foo" because of the binding. Note that if you type during the sleep the shell just sees - \n, which is translated to \r in getkey(), and you just get another + \n, which is translated to \r in getbyte(), and you just get another prompt. For type-ahead to work in ALL cases you have to use stty inlcr. @@ -321,9 +341,16 @@ zsetterm(void) static char *kungetbuf; static int kungetsz; +/* + * Note on ungetbyte and ungetbytes for the confused (pws): + * these are low level and deal with bytes before they + * have been converted into (possibly wide) characters. + * Hence the names. + */ + /**/ void -ungetkey(int ch) +ungetbyte(int ch) { if (kungetct == kungetsz) kungetbuf = realloc(kungetbuf, kungetsz *= 2); @@ -332,11 +359,11 @@ ungetkey(int ch) /**/ void -ungetkeys(char *s, int len) +ungetbytes(char *s, int len) { s += len; while (len--) - ungetkey(*--s); + ungetbyte(*--s); } #if defined(pyr) && defined(HAVE_SELECT) @@ -356,7 +383,7 @@ breakread(int fd, char *buf, int n) #endif static int -raw_getkey(int keytmout, char *cptr) +raw_getbyte(int keytmout, char *cptr) { long exp100ths; int ret; @@ -591,13 +618,22 @@ raw_getkey(int keytmout, char *cptr) /**/ mod_export int -getkey(int keytmout) +getbyte(int keytmout) { char cc; unsigned int ret; int die = 0, r, icnt = 0; int old_errno = errno, obreaks = breaks; +#ifdef ZLE_UNICODE_SUPPORT + /* + * Reading a single byte always invalidates the status + * of lastchar_wide. We may fix this up in getrestchar + * if this is the last byte of a wide character. + */ + lastchar_wide_valid = 0; +#endif + if (kungetct) ret = STOUC(kungetbuf[--kungetct]); else { @@ -612,10 +648,10 @@ getkey(int keytmout) for (;;) { int q = queue_signal_level(); dont_queue_signals(); - r = raw_getkey(keytmout, &cc); + r = raw_getbyte(keytmout, &cc); restore_queue_signals(q); if (r == -2) /* timeout */ - return EOF; + return lastchar = EOF; if (r == 1) break; if (r == 0) { @@ -642,7 +678,7 @@ getkey(int keytmout) errflag = 0; breaks = obreaks; errno = old_errno; - return EOF; + return lastchar = EOF; } else if (errno == EWOULDBLOCK) { fcntl(0, F_SETFL, 0); } else if (errno == EIO && !die) { @@ -665,15 +701,96 @@ getkey(int keytmout) ret = STOUC(cc); } + /* + * TODO: if vichgbuf is to be characters instead of a multibyte + * string the following needs moving to getfullchar(). + */ if (vichgflag) { if (vichgbufptr == vichgbufsz) vichgbuf = realloc(vichgbuf, vichgbufsz *= 2); vichgbuf[vichgbufptr++] = ret; } errno = old_errno; - return ret; + return lastchar = ret; } + +/* + * Get a full character rather than just a single byte. + * (TODO: Strictly we ought to call this getbyte and the above + * function getbyte.) + */ + +/**/ +mod_export ZLE_INT_T +getfullchar(int keytmout) +{ + int inchar = getbyte(keytmout); + +#ifdef ZLE_UNICODE_SUPPORT + return getrestchar(inchar); +#else + return inchar; +#endif +} + + +/**/ +#ifdef ZLE_UNICODE_SUPPORT +/* + * Get the remainder of a character if we support multibyte + * input strings. It may not require any more input, but + * we haven't yet checked. The character previously returned + * by getbyte() is passed down as inchar. + */ + +/**/ +mod_export ZLE_INT_T +getrestchar(int inchar) +{ + char cnull = '\0'; + char buf[MB_CUR_MAX], *ptr; + wchar_t outchar; + int ret; + + /* + * We are guaranteed to set a valid wide last character, + * although it may be WEOF (which is technically not + * a wide character at all...) + */ + lastchar_wide_valid = 1; + + if (inchar == EOF) + return lastchar_wide = WEOF; + + /* reset shift state by converting null */ + mbrtowc(&outchar, &cnull, 1, &ps); + + ptr = buf; + *ptr++ = inchar; + /* + * Return may be zero if we have a NULL; handle this like + * any other character. + */ + while ((ret = mbrtowc(&outchar, buf, ptr - buf, &ps)) < 0) { + if (ret == -1) { + /* + * Invalid input. Hmm, what's the right thing to do here? + */ + return lastchar_wide = WEOF; + } + /* No timeout here as we really need the character. */ + inchar = getbyte(0); + if (inchar == EOF) + return lastchar_wide = WEOF; + *ptr++ = inchar; + } + return lastchar_wide = (wint_t)outchar; +} +/**/ +#endif + + /**/ void zlecore(void) @@ -1445,7 +1562,7 @@ setup_(UNUSED(Module m)) zlereadptr = zleread; zlesetkeymapptr = zlesetkeymap; - getkeyptr = getkey; + getkeyptr = getbyte; /* initialise the thingies */ init_thingies(); diff --git a/Src/Zle/zle_misc.c b/Src/Zle/zle_misc.c index 86a0137b3..134ae21af 100644 --- a/Src/Zle/zle_misc.c +++ b/Src/Zle/zle_misc.c @@ -34,13 +34,13 @@ /**/ void -doinsert(char *str) +doinsert(ZLE_STRING_T zstr, int len) { - char *s; - int len = ztrlen(str); - int c1 = *str == Meta ? STOUC(str[1])^32 : STOUC(*str);/* first character */ + ZLE_STRING_T s; + ZLE_CHAR_T c1 = *zstr; /* first character */ int neg = zmult < 0; /* insert *after* the cursor? */ int m = neg ? -zmult : zmult; /* number of copies to insert */ + int count; iremovesuffix(c1, 0); invalidatelist(); @@ -50,8 +50,8 @@ doinsert(char *str) else if(zlecs + m * len > zlell) spaceinline(zlecs + m * len - zlell); while(m--) - for(s = str; *s; s++) - zleline[zlecs++] = *s == Meta ? *++s ^ 32 : *s; + for(s = zstr, count = len; count; s++, count--) + zleline[zlecs++] = *s; if(neg) zlecs += zmult * len; } @@ -60,25 +60,41 @@ doinsert(char *str) mod_export int selfinsert(UNUSED(char **args)) { - char s[3], *p = s; - - if(imeta(lastchar)) { - *p++ = Meta; - lastchar ^= 32; - } - *p++ = lastchar; - *p = 0; - doinsert(s); +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getrestchar(lastchar); + doinsert(&lastchar_wide, 1); +#else + char s = lastchar; + doinsert(&s, 1); +#endif return 0; } /**/ -mod_export int -selfinsertunmeta(char **args) +mod_export void +fixunmeta(void) { lastchar &= 0x7f; if (lastchar == '\r') lastchar = '\n'; +#ifdef ZLE_UNICODE_SUPPORT + /* + * TODO: can we do this better? + * We need a wide character to insert. + * selfinsertunmeta is intrinsically problematic + * with multibyte input. + */ + lastchar_wide = (ZLE_CHAR_T)lastchar; + lastchar_wide_valid = TRUE; +#endif +} + +/**/ +mod_export int +selfinsertunmeta(char **args) +{ + fixunmeta(); return selfinsert(args); } @@ -490,11 +506,11 @@ quotedinsert(char **args) sob.sg_flags = (sob.sg_flags | RAW) & ~ECHO; ioctl(SHTTY, TIOCSETN, &sob); #endif - lastchar = getkey(0); + getfullchar(0); #ifndef HAS_TIO zsetterm(); #endif - if (lastchar < 0) + if (LASTFULLCHAR == ZLEEOF) return 1; else return selfinsert(args); @@ -506,9 +522,20 @@ digitargument(UNUSED(char **args)) { int sign = (zmult < 0) ? -1 : 1; +#ifdef ZLE_UNICODE_SUPPORT + /* + * It's too dangerous to allow metafied input. See + * universalargument for comments on (possibly suboptimal) handling + * of digits. We are assuming ASCII is a subset of the multibyte + * encoding. + */ + if (lastchar < '0' || lastchar > '9') + return 1; +#else /* allow metafied as well as ordinary digits */ if ((lastchar & 0x7f) < '0' || (lastchar & 0x7f) > '9') return 1; +#endif if (!(zmod.flags & MOD_TMULT)) zmod.tmult = 0; @@ -546,7 +573,22 @@ universalargument(char **args) zmod.flags |= MOD_MULT; return 0; } - while ((gotk = getkey(0)) != EOF) { + /* + * TODO: this is quite tricky to do when trying to maintain + * compatibility between the old input system and Unicode. + * We don't know what follows the digits, so if we try to + * read wide characters we may fail (e.g. we may come across an old + * \M-style binding). + * + * If we assume individual bytes are either explicitly ASCII or + * not (a la UTF-8), we get away with it; we can back up individual + * bytes and everything will work. We may want to relax this + * assumption later. ("Much later" - (C) Steven Singer, + * CSR BlueCore firmware, ca. 2000.) + * + * Hence for now this remains byte-by-byte. + */ + while ((gotk = getbyte(0)) != EOF) { if (gotk == '-' && !digcnt) { minus = -1; digcnt++; @@ -554,7 +596,7 @@ universalargument(char **args) pref = pref * 10 + (gotk & 0xf); digcnt++; } else { - ungetkey(gotk); + ungetbyte(gotk); break; } } @@ -765,24 +807,32 @@ executenamedcommand(char *prmt) } else if(cmd == Th(z_viquotedinsert)) { *ptr = '^'; zrefresh(); - lastchar = getkey(0); - if(lastchar == EOF || !lastchar || len == NAMLEN) + getfullchar(0); + if(LASTFULLCHAR == ZLEEOF || !LASTFULLCHAR || len == NAMLEN) feep = 1; - else + else { + /* TODO: convert back to multibyte string */ *ptr++ = lastchar, len++, curlist = 0; + } } else if(cmd == Th(z_quotedinsert)) { - if((lastchar = getkey(0)) == EOF || !lastchar || len == NAMLEN) + if(getfullchar(0) == ZLEEOF || + !LASTFULLCHAR || len == NAMLEN) feep = 1; - else + else { + /* TODO: convert back to multibyte string */ *ptr++ = lastchar, len++, curlist = 0; + } } else if(cmd == Th(z_backwarddeletechar) || cmd == Th(z_vibackwarddeletechar)) { - if (len) + if (len) { + /* TODO: backward full character in multibyte string. Yuk. */ len--, ptr--, curlist = 0; + } } else if(cmd == Th(z_killregion) || cmd == Th(z_backwardkillword) || cmd == Th(z_vibackwardkillword)) { if (len) curlist = 0; + /* TODO: backward full character in multibyte string. Yuk. */ while (len && (len--, *--ptr != '-')); } else if(cmd == Th(z_killwholeline) || cmd == Th(z_vikillline) || cmd == Th(z_backwardkillline)) { @@ -812,9 +862,7 @@ executenamedcommand(char *prmt) unrefthingy(r); } if(cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if(lastchar == '\r') - lastchar = '\n'; + fixunmeta(); cmd = Th(z_selfinsert); } if (cmd == Th(z_listchoices) || cmd == Th(z_deletecharorlist) || @@ -867,11 +915,24 @@ executenamedcommand(char *prmt) len = cmdambig; } } else { - if (len == NAMLEN || icntrl(lastchar) || - cmd != Th(z_selfinsert)) + if (len == NAMLEN || cmd != Th(z_selfinsert)) feep = 1; - else - *ptr++ = lastchar, len++, curlist = 0; + else { +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getrestchar(0); + if (iswcntrl(lastchar)) +#else + if (icntrl(lastchar)) +#endif + { + feep = 1; + } + else { + /* TODO: convert back to multibyte string */ + *ptr++ = lastchar, len++, curlist = 0; + } + } } } if (feep) @@ -911,6 +972,9 @@ executenamedcommand(char *prmt) /* Length of suffix to remove when inserting each possible character value. * * suffixlen[256] is the length to remove for non-insertion editing actions. */ +/* + * TODO: Aargh, this is completely broken with wide characters. + */ /**/ mod_export int suffixlen[257]; @@ -1000,7 +1064,7 @@ makesuffixstr(char *f, char *s, int n) /**/ mod_export void -iremovesuffix(int c, int keep) +iremovesuffix(ZLE_CHAR_T c, int keep) { if (suffixfunc) { Eprog prog = getshfunc(suffixfunc); @@ -1024,7 +1088,12 @@ iremovesuffix(int c, int keep) zsfree(suffixfunc); suffixfunc = NULL; } else { +#ifdef ZLE_UNICODE_SUPPORT + /* TODO: best I can think of for now... */ + int sl = (unsigned int)c < 256 ? suffixlen[c] : 0; +#else int sl = suffixlen[c]; +#endif if(sl) { backdel(sl); if (!keep) diff --git a/Src/Zle/zle_move.c b/Src/Zle/zle_move.c index 48e9dd64f..b939df06b 100644 --- a/Src/Zle/zle_move.c +++ b/Src/Zle/zle_move.c @@ -353,13 +353,14 @@ vibeginningofline(UNUSED(char **args)) return 0; } -static int vfindchar, vfinddir, tailadd; +static ZLE_INT_T vfindchar; +static int vfinddir, tailadd; /**/ int vifindnextchar(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = 1; tailadd = 0; return virepeatfind(args); @@ -371,7 +372,7 @@ vifindnextchar(char **args) int vifindprevchar(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = -1; tailadd = 0; return virepeatfind(args); @@ -383,7 +384,7 @@ vifindprevchar(char **args) int vifindnextcharskip(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = 1; tailadd = -1; return virepeatfind(args); @@ -395,7 +396,7 @@ vifindnextcharskip(char **args) int vifindprevcharskip(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = -1; tailadd = 1; return virepeatfind(args); @@ -465,12 +466,12 @@ vifirstnonblank(UNUSED(char **args)) int visetmark(UNUSED(char **args)) { - int ch; + ZLE_INT_T ch; - ch = getkey(0); - if (ch < 'a' || ch > 'z') + ch = getfullchar(0); + if (ch < LETTER_a || ch > LETTER_z) return 1; - ch -= 'a'; + ch -= LETTER_a; vimarkcs[ch] = zlecs; vimarkline[ch] = histline; return 0; @@ -480,15 +481,15 @@ visetmark(UNUSED(char **args)) int vigotomark(UNUSED(char **args)) { - int ch; + ZLE_INT_T ch; - ch = getkey(0); - if (ch == lastchar) + ch = getfullchar(0); + if (ch == LASTFULLCHAR) ch = 26; else { - if (ch < 'a' || ch > 'z') + if (ch < LETTER_a || ch > LETTER_z) return 1; - ch -= 'a'; + ch -= LETTER_a; } if (!vimarkline[ch]) return 1; diff --git a/Src/Zle/zle_thingy.c b/Src/Zle/zle_thingy.c index 441d85b2c..494e1ade5 100644 --- a/Src/Zle/zle_thingy.c +++ b/Src/Zle/zle_thingy.c @@ -473,7 +473,7 @@ bin_zle_unget(char *name, char **args, UNUSED(Options ops), UNUSED(char func)) return 1; } while (p > b) - ungetkey((int) *--p); + ungetbyte((int) *--p); return 0; } diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c index a4d87dfea..15c723d7a 100644 --- a/Src/Zle/zle_tricky.c +++ b/Src/Zle/zle_tricky.c @@ -2297,13 +2297,28 @@ doexpandhist(void) return 0; } +/**/ +void +fixmagicspace(void) +{ + lastchar = ' '; +#ifdef ZLE_UNICODE_SUPPORT + /* + * This is redundant if the multibyte encoding extends ASCII, + * since lastchar is a full character, but it's safer anyway... + */ + lastchar_wide = L' '; + lastchar_wide_valid = TRUE; +#endif +} + /**/ int magicspace(char **args) { char *bangq; int ret; - lastchar = ' '; + fixmagicspace(); for (bangq = (char *)zleline; (bangq = strchr(bangq, bangchar)); bangq += 2) if (bangq[1] == '"' && (bangq == (char *)zleline || bangq[-1] != '\\')) diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c index ffd94def8..e6f696935 100644 --- a/Src/Zle/zle_utils.c +++ b/Src/Zle/zle_utils.c @@ -510,7 +510,7 @@ hstrnstr(char *haystack, int pos, char *needle, int len, int dir, int sens) mod_export int getzlequery(int yesno) { - int c; + ZLE_INT_T c; #ifdef FIONREAD int val; @@ -525,18 +525,18 @@ getzlequery(int yesno) #endif /* get a character from the tty and interpret it */ - c = getkey(0); + c = getfullchar(0); if (yesno) { - if (c == '\t') - c = 'y'; + if (c == ZLETAB) + c = LETTER_y; else if (icntrl(c) || c == EOF) - c = 'n'; + c = LETTER_n; else c = tulower(c); } /* echo response and return */ - if (c != '\n') - putc(c, shout); + if (c != ZLENL) + putc(c, shout); /* TODO: convert to multibyte */ return c; } diff --git a/Src/Zle/zle_vi.c b/Src/Zle/zle_vi.c index 15a2deb49..b45ccf10a 100644 --- a/Src/Zle/zle_vi.c +++ b/Src/Zle/zle_vi.c @@ -50,6 +50,11 @@ int vilinerange; /**/ int vichgbufsz, vichgbufptr, vichgflag; +/* + * TODO: need consistent handling of vichgbuf: ZLE_STRING_T or + * char *? Consequently, use of lastchar in this file needs fixing + * too. + */ /**/ char *vichgbuf; @@ -95,15 +100,15 @@ startvitext(int im) } /**/ -int +ZLE_INT_T vigetkey(void) { Keymap mn = openkeymap("main"); char m[3], *str; Thingy cmd; - if((lastchar = getkey(0)) == EOF) - return -1; + if(getbyte(0) == EOF) + return ZLEEOF; m[0] = lastchar; metafy(m, 1, META_NOALLOC); @@ -112,23 +117,35 @@ vigetkey(void) else cmd = t_undefinedkey; + /* + * TODO: if this was bound to self-insert, we may + * be on the first character of a multibyte string + * and need to acquire the rest. + */ if (!cmd || cmd == Th(z_sendbreak)) { - return -1; + return ZLEEOF; } else if (cmd == Th(z_quotedinsert)) { - if ((lastchar = getkey(0)) == EOF) - return -1; + if (getfullchar(0) == ZLEEOF) + return ZLEEOF; } else if(cmd == Th(z_viquotedinsert)) { - char sav = zleline[zlecs]; + ZLE_CHAR_T sav = zleline[zlecs]; zleline[zlecs] = '^'; zrefresh(); - lastchar = getkey(0); + getfullchar(0); zleline[zlecs] = sav; - if(lastchar == EOF) - return -1; - } else if (cmd == Th(z_vicmdmode)) - return -1; - return lastchar; + if(LASTFULLCHAR == ZLEEOF) + return ZLEEOF; + } else if (cmd == Th(z_vicmdmode)) { + return ZLEEOF; + } +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + { + getrestchar(lastchar); + } +#endif + return LASTFULLCHAR; } /**/ @@ -489,7 +506,7 @@ vireplacechars(UNUSED(char **args)) return 1; } /* get key */ - if((ch = vigetkey()) == -1) { + if((ch = vigetkey()) == ZLEEOF) { vichgflag = 0; return 1; } @@ -593,7 +610,7 @@ virepeatchange(UNUSED(char **args)) } /* repeat the command */ inrepeat = 1; - ungetkeys(vichgbuf, vichgbufptr); + ungetbytes(vichgbuf, vichgbufptr); return 0; } @@ -817,26 +834,35 @@ vicapslockpanic(UNUSED(char **args)) statusline = "press a lowercase key to continue"; statusll = strlen(statusline); zrefresh(); - while (!islower(getkey(0))); +#ifdef ZLE_UNICODE_SUPPORT + while (!iswlower(getfullchar(0))); +#else + while (!islower(getfullchar(0))); +#endif statusline = NULL; return 0; } +#ifdef ZLE_UNICODE_SUPPORT +#else +#endif + /**/ int visetbuffer(UNUSED(char **args)) { - int ch; + ZLE_INT_T ch; if ((zmod.flags & MOD_VIBUF) || - (((ch = getkey(0)) < '1' || ch > '9') && - (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))) + (((ch = getfullchar(0)) < DIGIT_1 || ch > DIGIT_9) && + (ch < LETTER_a || ch > LETTER_z) && + (ch < LETTER_A || ch > LETTER_Z))) return 1; - if (ch >= 'A' && ch <= 'Z') /* needed in cut() */ + if (ch >= LETTER_A && ch <= LETTER_Z) /* needed in cut() */ zmod.flags |= MOD_VIAPP; else zmod.flags &= ~MOD_VIAPP; - zmod.vibuf = tulower(ch) + (idigit(ch) ? -'1' + 26 : -'a'); + zmod.vibuf = tulower(ch) + (idigit(ch) ? - DIGIT_1 + 26 : -LETTER_a); zmod.flags |= MOD_VIBUF; prefixflag = 1; return 0; @@ -897,12 +923,12 @@ viquotedinsert(char **args) sob.sg_flags = (sob.sg_flags | RAW) & ~ECHO; ioctl(SHTTY, TIOCSETN, &sob); #endif - lastchar = getkey(0); + getfullchar(0); #ifndef HAS_TIO zsetterm(); #endif foredel(1); - if(lastchar < 0) + if(LASTFULLCHAR == ZLEEOF) return 1; else return selfinsert(args); -- cgit 1.4.1