diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | Src/Modules/stat.c | 4 | ||||
-rw-r--r-- | Src/Zle/complist.c | 8 | ||||
-rw-r--r-- | Src/Zle/deltochar.c | 3 | ||||
-rw-r--r-- | Src/Zle/zle.h | 69 | ||||
-rw-r--r-- | Src/Zle/zle_hist.c | 47 | ||||
-rw-r--r-- | Src/Zle/zle_keymap.c | 37 | ||||
-rw-r--r-- | Src/Zle/zle_main.c | 151 | ||||
-rw-r--r-- | Src/Zle/zle_misc.c | 139 | ||||
-rw-r--r-- | Src/Zle/zle_move.c | 29 | ||||
-rw-r--r-- | Src/Zle/zle_thingy.c | 2 | ||||
-rw-r--r-- | Src/Zle/zle_tricky.c | 17 | ||||
-rw-r--r-- | Src/Zle/zle_utils.c | 14 | ||||
-rw-r--r-- | Src/Zle/zle_vi.c | 72 | ||||
-rw-r--r-- | Src/system.h | 37 |
15 files changed, 478 insertions, 161 deletions
diff --git a/ChangeLog b/ChangeLog index 88102f518..0bf98e7f0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2005-02-18 Peter Stephenson <pws@csr.com> + * 20823: Src/Modules/stat.c: debugging test failed on 64-bit + machines. + + * 20822: system.h, Src/Zle/complist.c, Src/Zle/deltochar.c, + Src/Zle/zle.h, Src/Zle/zle_hist.c, Src/Zle/zle_keymap.c, + Src/Zle/zle_main.c, Src/Zle/zle_misc.c, Src/Zle/zle_move.c, + Src/Zle/zle_thingy.c, Src/Zle/zle_tricky.c, Src/Zle/zle_utils.c, + Src/Zle/zle_vi.c: improve input ready for multibyte/Unicode + handling, massaging use of getkey() and lastchar. + * Motoi Washida: users/8522: Completion/Darwin/Command/_defaults: Fix quotation in Darwin completion. diff --git a/Src/Modules/stat.c b/Src/Modules/stat.c index 29a4099d6..9f3d4a6de 100644 --- a/Src/Modules/stat.c +++ b/Src/Modules/stat.c @@ -239,7 +239,7 @@ statprint(struct stat *sbuf, char *outbuf, char *fname, int iwhich, int flags) #ifdef INO_T_IS_64_BIT convbase(optr, sbuf->st_ino, 0); #else - DPUTS(sizeof(sbuf->st_ino) > 4, + DPUTS(sizeof(sbuf->st_ino) > sizeof(unsigned long), "Shell compiled with wrong ino_t size"); statulprint((unsigned long)sbuf->st_ino, optr); #endif @@ -269,7 +269,7 @@ statprint(struct stat *sbuf, char *outbuf, char *fname, int iwhich, int flags) #ifdef OFF_T_IS_64_BIT convbase(optr, sbuf->st_size, 0); #else - DPUTS(sizeof(sbuf->st_size) > 4, + DPUTS(sizeof(sbuf->st_size) > sizeof(unsigned long), "Shell compiled with wrong off_t size"); statulprint((unsigned long)sbuf->st_size, optr); #endif diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c index 1cc709817..748b1fdf7 100644 --- a/Src/Zle/complist.c +++ b/Src/Zle/complist.c @@ -1869,6 +1869,10 @@ msearch(Cmatch **ptr, int ins, int back, int rep, int *wrapp) msearchpush(ptr, back); if (ins) { + /* + * TODO: probably need to convert back to multibyte character + * string? Who knows... + */ s[0] = lastchar; s[1] = '\0'; @@ -2802,9 +2806,7 @@ domenuselect(Hookdef dummy, Chdata dat) } } if (cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if (lastchar == '\r') - lastchar = '\n'; + fixunmeta(); } wrap = 0; np = msearch(p, ins, (ins ? (mode == MM_BSEARCH) : back), diff --git a/Src/Zle/deltochar.c b/Src/Zle/deltochar.c index 545c34c9f..d25f99680 100644 --- a/Src/Zle/deltochar.c +++ b/Src/Zle/deltochar.c @@ -37,7 +37,8 @@ static Widget w_zaptochar; static int deltochar(UNUSED(char **args)) { - int c = getkey(0), dest = zlecs, ok = 0, n = zmult; + ZLE_INT_T c = getfullchar(0); + int dest = zlecs, ok = 0, n = zmult; int zap = (bindk->widget == w_zaptochar); if (n > 0) { diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h index 3b9845f8a..a76d08e2d 100644 --- a/Src/Zle/zle.h +++ b/Src/Zle/zle.h @@ -27,6 +27,75 @@ * */ +#ifdef ZLE_UNICODE_SUPPORT +typedef wchar_t ZLE_CHAR_T; +typedef wchar_t *ZLE_STRING_T; +typedef int_t ZLE_INT_T; +#define ZLE_CHAR_SIZE sizeof(wchar_t) + +/* + * MB_CUR_MAX is the maximum number of bytes that a single wide + * character will convert into. We use it to keep strings + * sufficiently long. It should always be defined, but if it isn't + * just assume we are using Unicode which requires 6 characters. + * (Note that it's not necessarily defined to a constant.) + */ +#ifndef MB_CUR_MAX +#define MB_CUR_MAX 6 +#endif + +#define ZLENL L'\n' +#define ZLENUL L'\0' +#define ZLETAB L'\t' + +#define DIGIT_1 L'1' +#define DIGIT_9 L'9' +#define LETTER_a L'a' +#define LETTER_z L'z' +#define LETTER_A L'A' +#define LETTER_Z L'Z' +#define LETTER_y L'y' +#define LETTER_n L'n' + +#define ZLENULSTR L"" +#define ZLEEOF WEOF +#define ZS_memcpy wmemcpy +#define ZS_memmove wmemmove +#define ZC_icntrl iswcntrl + +#define LASTFULLCHAR lastchar_wide + +#else /* Not ZLE_UNICODE_SUPPORT: old single-byte code */ + +typedef int ZLE_CHAR_T; +typedef unsigned char *ZLE_STRING_T; +typedef int ZLE_INT_T; +#define ZLE_CHAR_SIZE sizeof(unsigned char) + +#define ZLENL '\n' +#define ZLENUL '\0' +#define ZLETAB '\t' + +#define DIGIT_1 '1' +#define DIGIT_9 '9' +#define LETTER_a 'a' +#define LETTER_z 'z' +#define LETTER_A 'A' +#define LETTER_Z 'Z' +#define LETTER_y 'y' +#define LETTER_n 'n' + +#define ZLENULSTR "" +#define ZLEEOF EOF +#define ZS_memcpy memcpy +#define ZS_memmove memmove +#define ZC_icntrl icntrl + +#define LASTFULLCHAR lastchar + +#endif + + typedef struct widget *Widget; typedef struct thingy *Thingy; diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c index 8cd082b72..afad7ae44 100644 --- a/Src/Zle/zle_hist.c +++ b/Src/Zle/zle_hist.c @@ -420,11 +420,12 @@ endofhistory(UNUSED(char **args)) int insertlastword(char **args) { - int n, nwords, histstep = -1, wordpos = 0, deleteword = 0; + int n, nwords, histstep = -1, wordpos = 0, deleteword = 0, len, sz; char *s, *t; Histent he = NULL; LinkList l = NULL; LinkNode node; + ZLE_STRING_T zs; static char *lastinsert; static int lasthist, lastpos, lastlen; @@ -554,7 +555,10 @@ insertlastword(char **args) memcpy(lastinsert, s, lastlen); n = zmult; zmult = 1; - doinsert(s); + + zs = stringaszleline((unsigned char *)s, &len, &sz); + doinsert(zs, len); + zfree(zs, sz); zmult = n; *t = save; return 0; @@ -780,7 +784,7 @@ doisearch(char **args, int dir) char *arg; savekeys = kungetct; arg = getkeystring(*args, &len, 2, NULL); - ungetkeys(arg, len); + ungetbytes(arg, len); } strcpy(ibuf, ISEARCH_PROMPT); @@ -951,18 +955,23 @@ doisearch(char **args, int dir) sbuf[sbptr] = '^'; zrefresh(); } - if ((lastchar = getkey(0)) == EOF) + if (getfullchar(0) == ZLEEOF) feep = 1; else goto ins; } else { if(cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if(lastchar == '\r') - lastchar = '\n'; - } else if (cmd == Th(z_magicspace)) - lastchar = ' '; - else if (cmd != Th(z_selfinsert)) { + fixunmeta(); + } else if (cmd == Th(z_magicspace)) { + fixmagicspace(); + } else if (cmd == Th(z_selfinsert)) { +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getfullcharrest(lastchar); +#else + ; +#endif + } else { ungetkeycmd(); if (cmd == Th(z_sendbreak)) sbptr = 0; @@ -979,6 +988,8 @@ doisearch(char **args, int dir) sbuf = ibuf + FIRST_SEARCH_CHAR; sibuf *= 2; } + /* TODO: use lastchar_wide if available, convert back to + * multibyte string. Yuk. */ sbuf[sbptr++] = lastchar; } if (feep) @@ -1093,7 +1104,7 @@ getvisrchstr(void) break; } if(cmd == Th(z_magicspace)) { - lastchar = ' '; + fixmagicspace(); cmd = Th(z_selfinsert); } if(cmd == Th(z_redisplay)) { @@ -1128,15 +1139,20 @@ getvisrchstr(void) sbuf[sptr] = '^'; zrefresh(); } - if ((lastchar = getkey(0)) == EOF) + if (getfullchar(0) == ZLEEOF) feep = 1; else goto ins; } else if(cmd == Th(z_selfinsertunmeta) || cmd == Th(z_selfinsert)) { if(cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if(lastchar == '\r') - lastchar = '\n'; + fixunmeta(); + } else { +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getrestchar(lastchar); +#else + ; +#endif } ins: if(sptr == ssbuf - 1) { @@ -1144,6 +1160,7 @@ getvisrchstr(void) strcpy(newbuf, sbuf); statusline = sbuf = newbuf; } + /* TODO: may be wide char, convert back to multibyte string */ sbuf[sptr++] = lastchar; } else { feep = 1; diff --git a/Src/Zle/zle_keymap.c b/Src/Zle/zle_keymap.c index 3aee499f0..9b83a4953 100644 --- a/Src/Zle/zle_keymap.c +++ b/Src/Zle/zle_keymap.c @@ -1272,7 +1272,21 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp) keybuflen = 0; keybuf[0] = 0; - while((lastchar = getkeybuf(!!lastlen)) != EOF) { + /* + * getkeybuf returns multibyte strings, which may not + * yet correspond to complete wide characters, regardless + * of the locale. This is because we can't be sure whether + * the key bindings and keyboard input always return such + * characters. So we always look up bindings for each + * chunk of string. Intelligence within self-insert tries + * to fix up insertion of real wide characters properly. + * + * Note that this does not stop the user binding wide characters to + * arbitrary functions, just so long as the string used in the + * argument to bindkey is in the correct form for the locale. + * That's beyond our control. + */ + while(getkeybuf(!!lastlen) != EOF) { char *s; Thingy f; int loc = 1; @@ -1296,7 +1310,7 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp) lastchar = lastc; if(lastlen != keybuflen) { unmetafy(keybuf + lastlen, &keybuflen); - ungetkeys(keybuf+lastlen, keybuflen); + ungetbytes(keybuf+lastlen, keybuflen); if(vichgflag) vichgbufptr -= keybuflen; keybuf[lastlen] = 0; @@ -1306,11 +1320,24 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp) return keybuf; } +/* + * Add a (possibly metafied) byte to the key input so far. + * This handles individual bytes of a multibyte string separately; + * see note in getkeymapcmd. Hence there is no wide character + * support at this level. + * + * TODO: Need to be careful about whether we return EOF in the + * middle of a wide character. However, I think we're OK since + * EOF and 0xff are distinct and we're reading bytes from the + * lower level, so EOF really does mean something went wrong. Even so, + * I'm worried enough to leave this note here for now. + */ + /**/ static int getkeybuf(int w) { - int c = getkey(w); + int c = getbyte(w); if(c < 0) return EOF; @@ -1332,7 +1359,7 @@ getkeybuf(int w) mod_export void ungetkeycmd(void) { - ungetkeys(keybuf, keybuflen); + ungetbytes(keybuf, keybuflen); } /* read a command from the current keymap, with widgets */ @@ -1359,7 +1386,7 @@ getkeycmd(void) return NULL; } pb = unmetafy(ztrdup(str), &len); - ungetkeys(pb, len); + ungetbytes(pb, len); zfree(pb, strlen(str) + 1); goto sentstring; } diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c index d25376862..1f7b7cbfa 100644 --- a/Src/Zle/zle_main.c +++ b/Src/Zle/zle_main.c @@ -78,10 +78,30 @@ int done; /**/ int mark; -/* last character pressed */ +/* + * Last character pressed. + * + * Depending how far we are with processing, the lastcharacter may + * be a single byte read (lastchar_wide_valid is 0, lastchar_wide is not + * valid) or a full wide character. This is needed because we can't be + * sure whether the user is typing old \M-style commands or multibyte + * input. + * + * Calling getfullchar or getrestchar is guaranteed to ensure we have + * a valid wide character (although this may be WEOF). In many states + * we know this and don't need to test lastchar_wide_valid. + */ /**/ -mod_export int lastchar; +mod_export int +lastchar; +#ifdef ZLE_UNICODE_SUPPORT +/**/ +mod_export ZLE_INT_T lastchar_wide; +/**/ +mod_export int +lastchar_wide_valid; +#endif /* the bindings for the previous and for this key */ @@ -148,7 +168,7 @@ mod_export struct modifier zmod; /**/ int prefixflag; -/* Number of characters waiting to be read by the ungetkeys mechanism */ +/* Number of characters waiting to be read by the ungetbytes mechanism */ /**/ int kungetct; @@ -196,7 +216,7 @@ zsetterm(void) * we can't set up the terminal for zle *at all* until * we are sure there is no more typeahead to come. So * if there is typeahead, we set the flag delayzsetterm. - * Then getkey() performs another FIONREAD call; if that is + * Then getbyte() performs another FIONREAD call; if that is * 0, we have finally used up all the typeahead, and it is * safe to alter the terminal, which we do at that point. */ @@ -266,7 +286,7 @@ zsetterm(void) ti.tio.c_cc[VMIN] = 1; ti.tio.c_cc[VTIME] = 0; ti.tio.c_iflag |= (INLCR | ICRNL); - /* this line exchanges \n and \r; it's changed back in getkey + /* this line exchanges \n and \r; it's changed back in getbyte so that the net effect is no change at all inside the shell. This double swap is to allow typeahead in common cases, eg. @@ -275,12 +295,12 @@ zsetterm(void) echo foo<return> <--- typed before sleep returns The shell sees \n instead of \r, since it was changed by the kernel - while zsh wasn't looking. Then in getkey() \n is changed back to \r, + while zsh wasn't looking. Then in getbyte() \n is changed back to \r, and it sees "echo foo<accept line>", as expected. Without the double swap the shell would see "echo foo\n", which is translated to "echo fooecho foo<accept line>" because of the binding. Note that if you type <line-feed> during the sleep the shell just sees - \n, which is translated to \r in getkey(), and you just get another + \n, which is translated to \r in getbyte(), and you just get another prompt. For type-ahead to work in ALL cases you have to use stty inlcr. @@ -321,9 +341,16 @@ zsetterm(void) static char *kungetbuf; static int kungetsz; +/* + * Note on ungetbyte and ungetbytes for the confused (pws): + * these are low level and deal with bytes before they + * have been converted into (possibly wide) characters. + * Hence the names. + */ + /**/ void -ungetkey(int ch) +ungetbyte(int ch) { if (kungetct == kungetsz) kungetbuf = realloc(kungetbuf, kungetsz *= 2); @@ -332,11 +359,11 @@ ungetkey(int ch) /**/ void -ungetkeys(char *s, int len) +ungetbytes(char *s, int len) { s += len; while (len--) - ungetkey(*--s); + ungetbyte(*--s); } #if defined(pyr) && defined(HAVE_SELECT) @@ -356,7 +383,7 @@ breakread(int fd, char *buf, int n) #endif static int -raw_getkey(int keytmout, char *cptr) +raw_getbyte(int keytmout, char *cptr) { long exp100ths; int ret; @@ -591,13 +618,22 @@ raw_getkey(int keytmout, char *cptr) /**/ mod_export int -getkey(int keytmout) +getbyte(int keytmout) { char cc; unsigned int ret; int die = 0, r, icnt = 0; int old_errno = errno, obreaks = breaks; +#ifdef ZLE_UNICODE_SUPPORT + /* + * Reading a single byte always invalidates the status + * of lastchar_wide. We may fix this up in getrestchar + * if this is the last byte of a wide character. + */ + lastchar_wide_valid = 0; +#endif + if (kungetct) ret = STOUC(kungetbuf[--kungetct]); else { @@ -612,10 +648,10 @@ getkey(int keytmout) for (;;) { int q = queue_signal_level(); dont_queue_signals(); - r = raw_getkey(keytmout, &cc); + r = raw_getbyte(keytmout, &cc); restore_queue_signals(q); if (r == -2) /* timeout */ - return EOF; + return lastchar = EOF; if (r == 1) break; if (r == 0) { @@ -642,7 +678,7 @@ getkey(int keytmout) errflag = 0; breaks = obreaks; errno = old_errno; - return EOF; + return lastchar = EOF; } else if (errno == EWOULDBLOCK) { fcntl(0, F_SETFL, 0); } else if (errno == EIO && !die) { @@ -665,15 +701,96 @@ getkey(int keytmout) ret = STOUC(cc); } + /* + * TODO: if vichgbuf is to be characters instead of a multibyte + * string the following needs moving to getfullchar(). + */ if (vichgflag) { if (vichgbufptr == vichgbufsz) vichgbuf = realloc(vichgbuf, vichgbufsz *= 2); vichgbuf[vichgbufptr++] = ret; } errno = old_errno; - return ret; + return lastchar = ret; } + +/* + * Get a full character rather than just a single byte. + * (TODO: Strictly we ought to call this getbyte and the above + * function getbyte.) + */ + +/**/ +mod_export ZLE_INT_T +getfullchar(int keytmout) +{ + int inchar = getbyte(keytmout); + +#ifdef ZLE_UNICODE_SUPPORT + return getrestchar(inchar); +#else + return inchar; +#endif +} + + +/**/ +#ifdef ZLE_UNICODE_SUPPORT +/* + * Get the remainder of a character if we support multibyte + * input strings. It may not require any more input, but + * we haven't yet checked. The character previously returned + * by getbyte() is passed down as inchar. + */ + +/**/ +mod_export ZLE_INT_T +getrestchar(int inchar) +{ + char cnull = '\0'; + char buf[MB_CUR_MAX], *ptr; + wchar_t outchar; + int ret; + + /* + * We are guaranteed to set a valid wide last character, + * although it may be WEOF (which is technically not + * a wide character at all...) + */ + lastchar_wide_valid = 1; + + if (inchar == EOF) + return lastchar_wide = WEOF; + + /* reset shift state by converting null */ + mbrtowc(&outchar, &cnull, 1, &ps); + + ptr = buf; + *ptr++ = inchar; + /* + * Return may be zero if we have a NULL; handle this like + * any other character. + */ + while ((ret = mbrtowc(&outchar, buf, ptr - buf, &ps)) < 0) { + if (ret == -1) { + /* + * Invalid input. Hmm, what's the right thing to do here? + */ + return lastchar_wide = WEOF; + } + /* No timeout here as we really need the character. */ + inchar = getbyte(0); + if (inchar == EOF) + return lastchar_wide = WEOF; + *ptr++ = inchar; + } + return lastchar_wide = (wint_t)outchar; +} +/**/ +#endif + + /**/ void zlecore(void) @@ -1445,7 +1562,7 @@ setup_(UNUSED(Module m)) zlereadptr = zleread; zlesetkeymapptr = zlesetkeymap; - getkeyptr = getkey; + getkeyptr = getbyte; /* initialise the thingies */ init_thingies(); diff --git a/Src/Zle/zle_misc.c b/Src/Zle/zle_misc.c index 86a0137b3..134ae21af 100644 --- a/Src/Zle/zle_misc.c +++ b/Src/Zle/zle_misc.c @@ -34,13 +34,13 @@ /**/ void -doinsert(char *str) +doinsert(ZLE_STRING_T zstr, int len) { - char *s; - int len = ztrlen(str); - int c1 = *str == Meta ? STOUC(str[1])^32 : STOUC(*str);/* first character */ + ZLE_STRING_T s; + ZLE_CHAR_T c1 = *zstr; /* first character */ int neg = zmult < 0; /* insert *after* the cursor? */ int m = neg ? -zmult : zmult; /* number of copies to insert */ + int count; iremovesuffix(c1, 0); invalidatelist(); @@ -50,8 +50,8 @@ doinsert(char *str) else if(zlecs + m * len > zlell) spaceinline(zlecs + m * len - zlell); while(m--) - for(s = str; *s; s++) - zleline[zlecs++] = *s == Meta ? *++s ^ 32 : *s; + for(s = zstr, count = len; count; s++, count--) + zleline[zlecs++] = *s; if(neg) zlecs += zmult * len; } @@ -60,25 +60,41 @@ doinsert(char *str) mod_export int selfinsert(UNUSED(char **args)) { - char s[3], *p = s; - - if(imeta(lastchar)) { - *p++ = Meta; - lastchar ^= 32; - } - *p++ = lastchar; - *p = 0; - doinsert(s); +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getrestchar(lastchar); + doinsert(&lastchar_wide, 1); +#else + char s = lastchar; + doinsert(&s, 1); +#endif return 0; } /**/ -mod_export int -selfinsertunmeta(char **args) +mod_export void +fixunmeta(void) { lastchar &= 0x7f; if (lastchar == '\r') lastchar = '\n'; +#ifdef ZLE_UNICODE_SUPPORT + /* + * TODO: can we do this better? + * We need a wide character to insert. + * selfinsertunmeta is intrinsically problematic + * with multibyte input. + */ + lastchar_wide = (ZLE_CHAR_T)lastchar; + lastchar_wide_valid = TRUE; +#endif +} + +/**/ +mod_export int +selfinsertunmeta(char **args) +{ + fixunmeta(); return selfinsert(args); } @@ -490,11 +506,11 @@ quotedinsert(char **args) sob.sg_flags = (sob.sg_flags | RAW) & ~ECHO; ioctl(SHTTY, TIOCSETN, &sob); #endif - lastchar = getkey(0); + getfullchar(0); #ifndef HAS_TIO zsetterm(); #endif - if (lastchar < 0) + if (LASTFULLCHAR == ZLEEOF) return 1; else return selfinsert(args); @@ -506,9 +522,20 @@ digitargument(UNUSED(char **args)) { int sign = (zmult < 0) ? -1 : 1; +#ifdef ZLE_UNICODE_SUPPORT + /* + * It's too dangerous to allow metafied input. See + * universalargument for comments on (possibly suboptimal) handling + * of digits. We are assuming ASCII is a subset of the multibyte + * encoding. + */ + if (lastchar < '0' || lastchar > '9') + return 1; +#else /* allow metafied as well as ordinary digits */ if ((lastchar & 0x7f) < '0' || (lastchar & 0x7f) > '9') return 1; +#endif if (!(zmod.flags & MOD_TMULT)) zmod.tmult = 0; @@ -546,7 +573,22 @@ universalargument(char **args) zmod.flags |= MOD_MULT; return 0; } - while ((gotk = getkey(0)) != EOF) { + /* + * TODO: this is quite tricky to do when trying to maintain + * compatibility between the old input system and Unicode. + * We don't know what follows the digits, so if we try to + * read wide characters we may fail (e.g. we may come across an old + * \M-style binding). + * + * If we assume individual bytes are either explicitly ASCII or + * not (a la UTF-8), we get away with it; we can back up individual + * bytes and everything will work. We may want to relax this + * assumption later. ("Much later" - (C) Steven Singer, + * CSR BlueCore firmware, ca. 2000.) + * + * Hence for now this remains byte-by-byte. + */ + while ((gotk = getbyte(0)) != EOF) { if (gotk == '-' && !digcnt) { minus = -1; digcnt++; @@ -554,7 +596,7 @@ universalargument(char **args) pref = pref * 10 + (gotk & 0xf); digcnt++; } else { - ungetkey(gotk); + ungetbyte(gotk); break; } } @@ -765,24 +807,32 @@ executenamedcommand(char *prmt) } else if(cmd == Th(z_viquotedinsert)) { *ptr = '^'; zrefresh(); - lastchar = getkey(0); - if(lastchar == EOF || !lastchar || len == NAMLEN) + getfullchar(0); + if(LASTFULLCHAR == ZLEEOF || !LASTFULLCHAR || len == NAMLEN) feep = 1; - else + else { + /* TODO: convert back to multibyte string */ *ptr++ = lastchar, len++, curlist = 0; + } } else if(cmd == Th(z_quotedinsert)) { - if((lastchar = getkey(0)) == EOF || !lastchar || len == NAMLEN) + if(getfullchar(0) == ZLEEOF || + !LASTFULLCHAR || len == NAMLEN) feep = 1; - else + else { + /* TODO: convert back to multibyte string */ *ptr++ = lastchar, len++, curlist = 0; + } } else if(cmd == Th(z_backwarddeletechar) || cmd == Th(z_vibackwarddeletechar)) { - if (len) + if (len) { + /* TODO: backward full character in multibyte string. Yuk. */ len--, ptr--, curlist = 0; + } } else if(cmd == Th(z_killregion) || cmd == Th(z_backwardkillword) || cmd == Th(z_vibackwardkillword)) { if (len) curlist = 0; + /* TODO: backward full character in multibyte string. Yuk. */ while (len && (len--, *--ptr != '-')); } else if(cmd == Th(z_killwholeline) || cmd == Th(z_vikillline) || cmd == Th(z_backwardkillline)) { @@ -812,9 +862,7 @@ executenamedcommand(char *prmt) unrefthingy(r); } if(cmd == Th(z_selfinsertunmeta)) { - lastchar &= 0x7f; - if(lastchar == '\r') - lastchar = '\n'; + fixunmeta(); cmd = Th(z_selfinsert); } if (cmd == Th(z_listchoices) || cmd == Th(z_deletecharorlist) || @@ -867,11 +915,24 @@ executenamedcommand(char *prmt) len = cmdambig; } } else { - if (len == NAMLEN || icntrl(lastchar) || - cmd != Th(z_selfinsert)) + if (len == NAMLEN || cmd != Th(z_selfinsert)) feep = 1; - else - *ptr++ = lastchar, len++, curlist = 0; + else { +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + getrestchar(0); + if (iswcntrl(lastchar)) +#else + if (icntrl(lastchar)) +#endif + { + feep = 1; + } + else { + /* TODO: convert back to multibyte string */ + *ptr++ = lastchar, len++, curlist = 0; + } + } } } if (feep) @@ -911,6 +972,9 @@ executenamedcommand(char *prmt) /* Length of suffix to remove when inserting each possible character value. * * suffixlen[256] is the length to remove for non-insertion editing actions. */ +/* + * TODO: Aargh, this is completely broken with wide characters. + */ /**/ mod_export int suffixlen[257]; @@ -1000,7 +1064,7 @@ makesuffixstr(char *f, char *s, int n) /**/ mod_export void -iremovesuffix(int c, int keep) +iremovesuffix(ZLE_CHAR_T c, int keep) { if (suffixfunc) { Eprog prog = getshfunc(suffixfunc); @@ -1024,7 +1088,12 @@ iremovesuffix(int c, int keep) zsfree(suffixfunc); suffixfunc = NULL; } else { +#ifdef ZLE_UNICODE_SUPPORT + /* TODO: best I can think of for now... */ + int sl = (unsigned int)c < 256 ? suffixlen[c] : 0; +#else int sl = suffixlen[c]; +#endif if(sl) { backdel(sl); if (!keep) diff --git a/Src/Zle/zle_move.c b/Src/Zle/zle_move.c index 48e9dd64f..b939df06b 100644 --- a/Src/Zle/zle_move.c +++ b/Src/Zle/zle_move.c @@ -353,13 +353,14 @@ vibeginningofline(UNUSED(char **args)) return 0; } -static int vfindchar, vfinddir, tailadd; +static ZLE_INT_T vfindchar; +static int vfinddir, tailadd; /**/ int vifindnextchar(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = 1; tailadd = 0; return virepeatfind(args); @@ -371,7 +372,7 @@ vifindnextchar(char **args) int vifindprevchar(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = -1; tailadd = 0; return virepeatfind(args); @@ -383,7 +384,7 @@ vifindprevchar(char **args) int vifindnextcharskip(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = 1; tailadd = -1; return virepeatfind(args); @@ -395,7 +396,7 @@ vifindnextcharskip(char **args) int vifindprevcharskip(char **args) { - if ((vfindchar = vigetkey()) != -1) { + if ((vfindchar = vigetkey()) != ZLEEOF) { vfinddir = -1; tailadd = 1; return virepeatfind(args); @@ -465,12 +466,12 @@ vifirstnonblank(UNUSED(char **args)) int visetmark(UNUSED(char **args)) { - int ch; + ZLE_INT_T ch; - ch = getkey(0); - if (ch < 'a' || ch > 'z') + ch = getfullchar(0); + if (ch < LETTER_a || ch > LETTER_z) return 1; - ch -= 'a'; + ch -= LETTER_a; vimarkcs[ch] = zlecs; vimarkline[ch] = histline; return 0; @@ -480,15 +481,15 @@ visetmark(UNUSED(char **args)) int vigotomark(UNUSED(char **args)) { - int ch; + ZLE_INT_T ch; - ch = getkey(0); - if (ch == lastchar) + ch = getfullchar(0); + if (ch == LASTFULLCHAR) ch = 26; else { - if (ch < 'a' || ch > 'z') + if (ch < LETTER_a || ch > LETTER_z) return 1; - ch -= 'a'; + ch -= LETTER_a; } if (!vimarkline[ch]) return 1; diff --git a/Src/Zle/zle_thingy.c b/Src/Zle/zle_thingy.c index 441d85b2c..494e1ade5 100644 --- a/Src/Zle/zle_thingy.c +++ b/Src/Zle/zle_thingy.c @@ -473,7 +473,7 @@ bin_zle_unget(char *name, char **args, UNUSED(Options ops), UNUSED(char func)) return 1; } while (p > b) - ungetkey((int) *--p); + ungetbyte((int) *--p); return 0; } diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c index a4d87dfea..15c723d7a 100644 --- a/Src/Zle/zle_tricky.c +++ b/Src/Zle/zle_tricky.c @@ -2298,12 +2298,27 @@ doexpandhist(void) } /**/ +void +fixmagicspace(void) +{ + lastchar = ' '; +#ifdef ZLE_UNICODE_SUPPORT + /* + * This is redundant if the multibyte encoding extends ASCII, + * since lastchar is a full character, but it's safer anyway... + */ + lastchar_wide = L' '; + lastchar_wide_valid = TRUE; +#endif +} + +/**/ int magicspace(char **args) { char *bangq; int ret; - lastchar = ' '; + fixmagicspace(); for (bangq = (char *)zleline; (bangq = strchr(bangq, bangchar)); bangq += 2) if (bangq[1] == '"' && (bangq == (char *)zleline || bangq[-1] != '\\')) diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c index ffd94def8..e6f696935 100644 --- a/Src/Zle/zle_utils.c +++ b/Src/Zle/zle_utils.c @@ -510,7 +510,7 @@ hstrnstr(char *haystack, int pos, char *needle, int len, int dir, int sens) mod_export int getzlequery(int yesno) { - int c; + ZLE_INT_T c; #ifdef FIONREAD int val; @@ -525,18 +525,18 @@ getzlequery(int yesno) #endif /* get a character from the tty and interpret it */ - c = getkey(0); + c = getfullchar(0); if (yesno) { - if (c == '\t') - c = 'y'; + if (c == ZLETAB) + c = LETTER_y; else if (icntrl(c) || c == EOF) - c = 'n'; + c = LETTER_n; else c = tulower(c); } /* echo response and return */ - if (c != '\n') - putc(c, shout); + if (c != ZLENL) + putc(c, shout); /* TODO: convert to multibyte */ return c; } diff --git a/Src/Zle/zle_vi.c b/Src/Zle/zle_vi.c index 15a2deb49..b45ccf10a 100644 --- a/Src/Zle/zle_vi.c +++ b/Src/Zle/zle_vi.c @@ -50,6 +50,11 @@ int vilinerange; /**/ int vichgbufsz, vichgbufptr, vichgflag; +/* + * TODO: need consistent handling of vichgbuf: ZLE_STRING_T or + * char *? Consequently, use of lastchar in this file needs fixing + * too. + */ /**/ char *vichgbuf; @@ -95,15 +100,15 @@ startvitext(int im) } /**/ -int +ZLE_INT_T vigetkey(void) { Keymap mn = openkeymap("main"); char m[3], *str; Thingy cmd; - if((lastchar = getkey(0)) == EOF) - return -1; + if(getbyte(0) == EOF) + return ZLEEOF; m[0] = lastchar; metafy(m, 1, META_NOALLOC); @@ -112,23 +117,35 @@ vigetkey(void) else cmd = t_undefinedkey; + /* + * TODO: if this was bound to self-insert, we may + * be on the first character of a multibyte string + * and need to acquire the rest. + */ if (!cmd || cmd == Th(z_sendbreak)) { - return -1; + return ZLEEOF; } else if (cmd == Th(z_quotedinsert)) { - if ((lastchar = getkey(0)) == EOF) - return -1; + if (getfullchar(0) == ZLEEOF) + return ZLEEOF; } else if(cmd == Th(z_viquotedinsert)) { - char sav = zleline[zlecs]; + ZLE_CHAR_T sav = zleline[zlecs]; zleline[zlecs] = '^'; zrefresh(); - lastchar = getkey(0); + getfullchar(0); zleline[zlecs] = sav; - if(lastchar == EOF) - return -1; - } else if (cmd == Th(z_vicmdmode)) - return -1; - return lastchar; + if(LASTFULLCHAR == ZLEEOF) + return ZLEEOF; + } else if (cmd == Th(z_vicmdmode)) { + return ZLEEOF; + } +#ifdef ZLE_UNICODE_SUPPORT + if (!lastchar_wide_valid) + { + getrestchar(lastchar); + } +#endif + return LASTFULLCHAR; } /**/ @@ -489,7 +506,7 @@ vireplacechars(UNUSED(char **args)) return 1; } /* get key */ - if((ch = vigetkey()) == -1) { + if((ch = vigetkey()) == ZLEEOF) { vichgflag = 0; return 1; } @@ -593,7 +610,7 @@ virepeatchange(UNUSED(char **args)) } /* repeat the command */ inrepeat = 1; - ungetkeys(vichgbuf, vichgbufptr); + ungetbytes(vichgbuf, vichgbufptr); return 0; } @@ -817,26 +834,35 @@ vicapslockpanic(UNUSED(char **args)) statusline = "press a lowercase key to continue"; statusll = strlen(statusline); zrefresh(); - while (!islower(getkey(0))); +#ifdef ZLE_UNICODE_SUPPORT + while (!iswlower(getfullchar(0))); +#else + while (!islower(getfullchar(0))); +#endif statusline = NULL; return 0; } +#ifdef ZLE_UNICODE_SUPPORT +#else +#endif + /**/ int visetbuffer(UNUSED(char **args)) { - int ch; + ZLE_INT_T ch; if ((zmod.flags & MOD_VIBUF) || - (((ch = getkey(0)) < '1' || ch > '9') && - (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))) + (((ch = getfullchar(0)) < DIGIT_1 || ch > DIGIT_9) && + (ch < LETTER_a || ch > LETTER_z) && + (ch < LETTER_A || ch > LETTER_Z))) return 1; - if (ch >= 'A' && ch <= 'Z') /* needed in cut() */ + if (ch >= LETTER_A && ch <= LETTER_Z) /* needed in cut() */ zmod.flags |= MOD_VIAPP; else zmod.flags &= ~MOD_VIAPP; - zmod.vibuf = tulower(ch) + (idigit(ch) ? -'1' + 26 : -'a'); + zmod.vibuf = tulower(ch) + (idigit(ch) ? - DIGIT_1 + 26 : -LETTER_a); zmod.flags |= MOD_VIBUF; prefixflag = 1; return 0; @@ -897,12 +923,12 @@ viquotedinsert(char **args) sob.sg_flags = (sob.sg_flags | RAW) & ~ECHO; ioctl(SHTTY, TIOCSETN, &sob); #endif - lastchar = getkey(0); + getfullchar(0); #ifndef HAS_TIO zsetterm(); #endif foredel(1); - if(lastchar < 0) + if(LASTFULLCHAR == ZLEEOF) return 1; else return selfinsert(args); diff --git a/Src/system.h b/Src/system.h index 37f7d9fe3..b1c7481b7 100644 --- a/Src/system.h +++ b/Src/system.h @@ -705,40 +705,3 @@ extern short ospeed; # endif # endif #endif - -#ifdef ZLE_UNICODE_SUPPORT -typedef wchar_t ZLE_CHAR_T; -typedef wchar_t *ZLE_STRING_T; -#define ZLE_CHAR_SIZE sizeof(wchar_t) - -/* - * MB_CUR_MAX is the maximum number of bytes that a single wide - * character will convert into. We use it to keep strings - * sufficiently long. It should always be defined, but if it isn't - * just assume we are using Unicode which requires 6 characters. - * (Note that it's not necessarily defined to a constant.) - */ -#ifndef MB_CUR_MAX -#define MB_CUR_MAX 6 -#endif - -#define ZLENL L'\n' -#define ZLENUL L'\0' -#define ZLETAB L'\t' -#define ZLENULSTR L"" -#define ZS_memcpy wmemcpy -#define ZS_memmove wmemmove -#define ZC_icntrl iswcntrl -#else -typedef int ZLE_CHAR_T; -typedef unsigned char *ZLE_STRING_T; -#define ZLE_CHAR_SIZE sizeof(unsigned char) - -#define ZLENL '\n' -#define ZLENUL '\0' -#define ZLETAB '\t' -#define ZLENULSTR "" -#define ZS_memcpy memcpy -#define ZS_memmove memmove -#define ZC_icntrl icntrl -#endif |