20822: Initial code for Unicode/multibyte input

20823: Debugging test in stat wrong for 64-bit systems
author: Peter Stephenson <pws@users.sourceforge.net> 2005-02-18 13:57:25 +0000
committer: Peter Stephenson <pws@users.sourceforge.net> 2005-02-18 13:57:25 +0000
commit: d7c13fb2c3b1b014acde9c1cb17a1e34239b9751 (patch)
tree: 7b31d7d08233e1cc1b1ab46af1ac44b25ed4f2c1 /Src/Zle
parent: 294ef9e87237bf1dc12b17a26bc4b22aa5604282 (diff)
download: zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.tar.gz
zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.tar.xz
zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.zip
12 files changed, 466 insertions, 122 deletions
diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c
index 1cc709817..748b1fdf7 100644
--- a/Src/Zle/complist.c
+++ b/Src/Zle/complist.c
@@ -1869,6 +1869,10 @@ msearch(Cmatch **ptr, int ins, int back, int rep, int *wrapp)
     msearchpush(ptr, back);
 
     if (ins) {
+	/*
+	 * TODO: probably need to convert back to multibyte character
+	 * string?  Who knows...
+	 */
         s[0] = lastchar;
         s[1] = '\0';
 
@@ -2802,9 +2806,7 @@ domenuselect(Hookdef dummy, Chdata dat)
                     }
                 }
                 if (cmd == Th(z_selfinsertunmeta)) {
-                    lastchar &= 0x7f;
-                    if (lastchar == '\r')
-                        lastchar = '\n';
+		    fixunmeta();
                 }
                 wrap = 0;
                 np = msearch(p, ins, (ins ? (mode == MM_BSEARCH) : back),
diff --git a/Src/Zle/deltochar.c b/Src/Zle/deltochar.c
index 545c34c9f..d25f99680 100644
--- a/Src/Zle/deltochar.c
+++ b/Src/Zle/deltochar.c
@@ -37,7 +37,8 @@ static Widget w_zaptochar;
 static int
 deltochar(UNUSED(char **args))
 {
-    int c = getkey(0), dest = zlecs, ok = 0, n = zmult;
+    ZLE_INT_T c = getfullchar(0);
+    int dest = zlecs, ok = 0, n = zmult;
     int zap = (bindk->widget == w_zaptochar);
 
     if (n > 0) {
diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h
index 3b9845f8a..a76d08e2d 100644
--- a/Src/Zle/zle.h
+++ b/Src/Zle/zle.h
@@ -27,6 +27,75 @@
  *
  */
 
+#ifdef ZLE_UNICODE_SUPPORT
+typedef wchar_t ZLE_CHAR_T;
+typedef wchar_t *ZLE_STRING_T;
+typedef int_t   ZLE_INT_T;
+#define ZLE_CHAR_SIZE	sizeof(wchar_t)
+
+/*
+ * MB_CUR_MAX is the maximum number of bytes that a single wide
+ * character will convert into.  We use it to keep strings
+ * sufficiently long.  It should always be defined, but if it isn't
+ * just assume we are using Unicode which requires 6 characters.
+ * (Note that it's not necessarily defined to a constant.)
+ */
+#ifndef MB_CUR_MAX
+#define MB_CUR_MAX 6
+#endif
+
+#define ZLENL	L'\n'
+#define ZLENUL	L'\0'
+#define ZLETAB	L'\t'
+
+#define DIGIT_1		L'1'
+#define DIGIT_9		L'9'
+#define LETTER_a	L'a'
+#define LETTER_z	L'z'
+#define LETTER_A	L'A'
+#define LETTER_Z	L'Z'
+#define LETTER_y	L'y'
+#define LETTER_n	L'n'
+
+#define ZLENULSTR	L""
+#define ZLEEOF	WEOF
+#define ZS_memcpy wmemcpy
+#define ZS_memmove wmemmove
+#define ZC_icntrl iswcntrl
+
+#define LASTFULLCHAR	lastchar_wide
+
+#else  /* Not ZLE_UNICODE_SUPPORT: old single-byte code */
+
+typedef int ZLE_CHAR_T;
+typedef unsigned char *ZLE_STRING_T;
+typedef int ZLE_INT_T;
+#define ZLE_CHAR_SIZE	sizeof(unsigned char)
+
+#define ZLENL	'\n'
+#define ZLENUL	'\0'
+#define ZLETAB	'\t'
+
+#define DIGIT_1		'1'
+#define DIGIT_9		'9'
+#define LETTER_a	'a'
+#define LETTER_z	'z'
+#define LETTER_A	'A'
+#define LETTER_Z	'Z'
+#define LETTER_y	'y'
+#define LETTER_n	'n'
+
+#define ZLENULSTR	""
+#define ZLEEOF	EOF
+#define ZS_memcpy memcpy
+#define ZS_memmove memmove
+#define ZC_icntrl icntrl
+
+#define LASTFULLCHAR	lastchar
+
+#endif
+
+
 typedef struct widget *Widget;
 typedef struct thingy *Thingy;
 
diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c
index 8cd082b72..afad7ae44 100644
--- a/Src/Zle/zle_hist.c
+++ b/Src/Zle/zle_hist.c
@@ -420,11 +420,12 @@ endofhistory(UNUSED(char **args))
 int
 insertlastword(char **args)
 {
-    int n, nwords, histstep = -1, wordpos = 0, deleteword = 0;
+    int n, nwords, histstep = -1, wordpos = 0, deleteword = 0, len, sz;
     char *s, *t;
     Histent he = NULL;
     LinkList l = NULL;
     LinkNode node;
+    ZLE_STRING_T zs;
 
     static char *lastinsert;
     static int lasthist, lastpos, lastlen;
@@ -554,7 +555,10 @@ insertlastword(char **args)
     memcpy(lastinsert, s, lastlen);
     n = zmult;
     zmult = 1;
-    doinsert(s);
+
+    zs = stringaszleline((unsigned char *)s, &len, &sz);
+    doinsert(zs, len);
+    zfree(zs, sz);
     zmult = n;
     *t = save;
     return 0;
@@ -780,7 +784,7 @@ doisearch(char **args, int dir)
 	char *arg;
 	savekeys = kungetct;
 	arg = getkeystring(*args, &len, 2, NULL);
-	ungetkeys(arg, len);
+	ungetbytes(arg, len);
     }
 
     strcpy(ibuf, ISEARCH_PROMPT);
@@ -951,18 +955,23 @@ doisearch(char **args, int dir)
 		sbuf[sbptr] = '^';
 		zrefresh();
 	    }
-	    if ((lastchar = getkey(0)) == EOF)
+	    if (getfullchar(0) == ZLEEOF)
 		feep = 1;
 	    else
 		goto ins;
 	} else {
 	    if(cmd == Th(z_selfinsertunmeta)) {
-		lastchar &= 0x7f;
-		if(lastchar == '\r')
-		    lastchar = '\n';
-	    } else if (cmd == Th(z_magicspace))
-		lastchar = ' ';
-	    else if (cmd != Th(z_selfinsert)) {
+		fixunmeta();
+	    } else if (cmd == Th(z_magicspace)) {
+		fixmagicspace();
+	    } else if (cmd == Th(z_selfinsert)) {
+#ifdef ZLE_UNICODE_SUPPORT
+		if (!lastchar_wide_valid)
+		    getfullcharrest(lastchar);
+#else
+		;
+#endif
+	    } else {
 		ungetkeycmd();
 		if (cmd == Th(z_sendbreak))
 		    sbptr = 0;
@@ -979,6 +988,8 @@ doisearch(char **args, int dir)
 		sbuf = ibuf + FIRST_SEARCH_CHAR;
 		sibuf *= 2;
 	    }
+	    /* TODO: use lastchar_wide if available, convert back to
+	     * multibyte string.  Yuk.  */
 	    sbuf[sbptr++] = lastchar;
 	}
 	if (feep)
@@ -1093,7 +1104,7 @@ getvisrchstr(void)
 	    break;
 	}
 	if(cmd == Th(z_magicspace)) {
-	    lastchar = ' ';
+	    fixmagicspace();
 	    cmd = Th(z_selfinsert);
 	}
 	if(cmd == Th(z_redisplay)) {
@@ -1128,15 +1139,20 @@ getvisrchstr(void)
 		sbuf[sptr] = '^';
 		zrefresh();
 	    }
-	    if ((lastchar = getkey(0)) == EOF)
+	    if (getfullchar(0) == ZLEEOF)
 		feep = 1;
 	    else
 		goto ins;
 	} else if(cmd == Th(z_selfinsertunmeta) || cmd == Th(z_selfinsert)) {
 	    if(cmd == Th(z_selfinsertunmeta)) {
-		lastchar &= 0x7f;
-		if(lastchar == '\r')
-		    lastchar = '\n';
+		fixunmeta();
+	    } else {
+#ifdef ZLE_UNICODE_SUPPORT
+		if (!lastchar_wide_valid)
+		    getrestchar(lastchar);
+#else
+		;
+#endif
 	    }
 	  ins:
 	    if(sptr == ssbuf - 1) {
@@ -1144,6 +1160,7 @@ getvisrchstr(void)
 		strcpy(newbuf, sbuf);
 		statusline = sbuf = newbuf;
 	    }
+	    /* TODO: may be wide char, convert back to multibyte string */
 	    sbuf[sptr++] = lastchar;
 	} else {
 	    feep = 1;
diff --git a/Src/Zle/zle_keymap.c b/Src/Zle/zle_keymap.c
index 3aee499f0..9b83a4953 100644
--- a/Src/Zle/zle_keymap.c
+++ b/Src/Zle/zle_keymap.c
@@ -1272,7 +1272,21 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp)
 
     keybuflen = 0;
     keybuf[0] = 0;
-    while((lastchar = getkeybuf(!!lastlen)) != EOF) {
+    /*
+     * getkeybuf returns multibyte strings, which may not
+     * yet correspond to complete wide characters, regardless
+     * of the locale.  This is because we can't be sure whether
+     * the key bindings and keyboard input always return such
+     * characters.  So we always look up bindings for each
+     * chunk of string.  Intelligence within self-insert tries
+     * to fix up insertion of real wide characters properly.
+     *
+     * Note that this does not stop the user binding wide characters to
+     * arbitrary functions, just so long as the string used in the
+     * argument to bindkey is in the correct form for the locale.
+     * That's beyond our control.
+     */
+    while(getkeybuf(!!lastlen) != EOF) {
 	char *s;
 	Thingy f;
 	int loc = 1;
@@ -1296,7 +1310,7 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp)
 	lastchar = lastc;
     if(lastlen != keybuflen) {
 	unmetafy(keybuf + lastlen, &keybuflen);
-	ungetkeys(keybuf+lastlen, keybuflen);
+	ungetbytes(keybuf+lastlen, keybuflen);
 	if(vichgflag)
 	    vichgbufptr -= keybuflen;
 	keybuf[lastlen] = 0;
@@ -1306,11 +1320,24 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp)
     return keybuf;
 }
 
+/*
+ * Add a (possibly metafied) byte to the key input so far.
+ * This handles individual bytes of a multibyte string separately;
+ * see note in getkeymapcmd.  Hence there is no wide character
+ * support at this level.
+ *
+ * TODO: Need to be careful about whether we return EOF in the
+ * middle of a wide character.  However, I think we're OK since
+ * EOF and 0xff are distinct and we're reading bytes from the
+ * lower level, so EOF really does mean something went wrong.  Even so,
+ * I'm worried enough to leave this note here for now.
+ */
+
 /**/
 static int
 getkeybuf(int w)
 {
-    int c = getkey(w);
+    int c = getbyte(w);
 
     if(c < 0)
 	return EOF;
@@ -1332,7 +1359,7 @@ getkeybuf(int w)
 mod_export void
 ungetkeycmd(void)
 {
-    ungetkeys(keybuf, keybuflen);
+    ungetbytes(keybuf, keybuflen);
 }
 
 /* read a command from the current keymap, with widgets */
@@ -1359,7 +1386,7 @@ getkeycmd(void)
 	    return NULL;
 	}
 	pb = unmetafy(ztrdup(str), &len);
-	ungetkeys(pb, len);
+	ungetbytes(pb, len);
 	zfree(pb, strlen(str) + 1);
 	goto sentstring;
     }
diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c
index d25376862..1f7b7cbfa 100644
--- a/Src/Zle/zle_main.c
+++ b/Src/Zle/zle_main.c
@@ -78,10 +78,30 @@ int done;
 /**/
 int mark;
 
-/* last character pressed */
+/*
+ * Last character pressed.
+ *
+ * Depending how far we are with processing, the lastcharacter may
+ * be a single byte read (lastchar_wide_valid is 0, lastchar_wide is not
+ * valid) or a full wide character.  This is needed because we can't be
+ * sure whether the user is typing old \M-style commands or multibyte
+ * input.
+ *
+ * Calling getfullchar or getrestchar is guaranteed to ensure we have
+ * a valid wide character (although this may be WEOF).  In many states
+ * we know this and don't need to test lastchar_wide_valid.
+ */
 
 /**/
-mod_export int lastchar;
+mod_export int
+lastchar;
+#ifdef ZLE_UNICODE_SUPPORT
+/**/
+mod_export ZLE_INT_T lastchar_wide;
+/**/
+mod_export int
+lastchar_wide_valid;
+#endif
 
 /* the bindings for the previous and for this key */
 
@@ -148,7 +168,7 @@ mod_export struct modifier zmod;
 /**/
 int prefixflag;
 
-/* Number of characters waiting to be read by the ungetkeys mechanism */
+/* Number of characters waiting to be read by the ungetbytes mechanism */
 /**/
 int kungetct;
 
@@ -196,7 +216,7 @@ zsetterm(void)
 	 * we can't set up the terminal for zle *at all* until
 	 * we are sure there is no more typeahead to come.  So
 	 * if there is typeahead, we set the flag delayzsetterm.
-	 * Then getkey() performs another FIONREAD call; if that is
+	 * Then getbyte() performs another FIONREAD call; if that is
 	 * 0, we have finally used up all the typeahead, and it is
 	 * safe to alter the terminal, which we do at that point.
 	 */
@@ -266,7 +286,7 @@ zsetterm(void)
     ti.tio.c_cc[VMIN] = 1;
     ti.tio.c_cc[VTIME] = 0;
     ti.tio.c_iflag |= (INLCR | ICRNL);
- /* this line exchanges \n and \r; it's changed back in getkey
+ /* this line exchanges \n and \r; it's changed back in getbyte
 	so that the net effect is no change at all inside the shell.
 	This double swap is to allow typeahead in common cases, eg.
 
@@ -275,12 +295,12 @@ zsetterm(void)
 	echo foo<return>  <--- typed before sleep returns
 
 	The shell sees \n instead of \r, since it was changed by the kernel
-	while zsh wasn't looking. Then in getkey() \n is changed back to \r,
+	while zsh wasn't looking. Then in getbyte() \n is changed back to \r,
 	and it sees "echo foo<accept line>", as expected. Without the double
 	swap the shell would see "echo foo\n", which is translated to
 	"echo fooecho foo<accept line>" because of the binding.
 	Note that if you type <line-feed> during the sleep the shell just sees
-	\n, which is translated to \r in getkey(), and you just get another
+	\n, which is translated to \r in getbyte(), and you just get another
 	prompt. For type-ahead to work in ALL cases you have to use
 	stty inlcr.
 
@@ -321,9 +341,16 @@ zsetterm(void)
 static char *kungetbuf;
 static int kungetsz;
 
+/*
+ * Note on ungetbyte and ungetbytes for the confused (pws):
+ * these are low level and deal with bytes before they
+ * have been converted into (possibly wide) characters.
+ * Hence the names.
+ */
+
 /**/
 void
-ungetkey(int ch)
+ungetbyte(int ch)
 {
     if (kungetct == kungetsz)
 	kungetbuf = realloc(kungetbuf, kungetsz *= 2);
@@ -332,11 +359,11 @@ ungetkey(int ch)
 
 /**/
 void
-ungetkeys(char *s, int len)
+ungetbytes(char *s, int len)
 {
     s += len;
     while (len--)
-	ungetkey(*--s);
+	ungetbyte(*--s);
 }
 
 #if defined(pyr) && defined(HAVE_SELECT)
@@ -356,7 +383,7 @@ breakread(int fd, char *buf, int n)
 #endif
 
 static int
-raw_getkey(int keytmout, char *cptr)
+raw_getbyte(int keytmout, char *cptr)
 {
     long exp100ths;
     int ret;
@@ -591,13 +618,22 @@ raw_getkey(int keytmout, char *cptr)
 
 /**/
 mod_export int
-getkey(int keytmout)
+getbyte(int keytmout)
 {
     char cc;
     unsigned int ret;
     int die = 0, r, icnt = 0;
     int old_errno = errno, obreaks = breaks;
 
+#ifdef ZLE_UNICODE_SUPPORT
+    /*
+     * Reading a single byte always invalidates the status
+     * of lastchar_wide.  We may fix this up in getrestchar
+     * if this is the last byte of a wide character.
+     */
+    lastchar_wide_valid = 0;
+#endif
+
     if (kungetct)
 	ret = STOUC(kungetbuf[--kungetct]);
     else {
@@ -612,10 +648,10 @@ getkey(int keytmout)
 	for (;;) {
 	    int q = queue_signal_level();
 	    dont_queue_signals();
-	    r = raw_getkey(keytmout, &cc);
+	    r = raw_getbyte(keytmout, &cc);
 	    restore_queue_signals(q);
 	    if (r == -2)	/* timeout */
-		return EOF;
+		return lastchar = EOF;
 	    if (r == 1)
 		break;
 	    if (r == 0) {
@@ -642,7 +678,7 @@ getkey(int keytmout)
 		errflag = 0;
 		breaks = obreaks;
 		errno = old_errno;
-		return EOF;
+		return lastchar = EOF;
 	    } else if (errno == EWOULDBLOCK) {
 		fcntl(0, F_SETFL, 0);
 	    } else if (errno == EIO && !die) {
@@ -665,15 +701,96 @@ getkey(int keytmout)
 
 	ret = STOUC(cc);
     }
+    /*
+     * TODO: if vichgbuf is to be characters instead of a multibyte
+     * string the following needs moving to getfullchar().
+     */
     if (vichgflag) {
 	if (vichgbufptr == vichgbufsz)
 	    vichgbuf = realloc(vichgbuf, vichgbufsz *= 2);
 	vichgbuf[vichgbufptr++] = ret;
     }
     errno = old_errno;
-    return ret;
+    return lastchar = ret;
 }
 
+
+/*
+ * Get a full character rather than just a single byte.
+ * (TODO: Strictly we ought to call this getbyte and the above
+ * function getbyte.)
+ */
+
+/**/
+mod_export ZLE_INT_T
+getfullchar(int keytmout)
+{
+    int inchar = getbyte(keytmout);
+
+#ifdef ZLE_UNICODE_SUPPORT
+    return getrestchar(inchar);
+#else
+    return inchar;
+#endif
+}
+
+
+/**/
+#ifdef ZLE_UNICODE_SUPPORT
+/*
+ * Get the remainder of a character if we support multibyte
+ * input strings.  It may not require any more input, but
+ * we haven't yet checked.  The character previously returned
+ * by getbyte() is passed down as inchar.
+ */
+
+/**/
+mod_export ZLE_INT_T
+getrestchar(int inchar)
+{
+    char cnull = '\0';
+    char buf[MB_CUR_MAX], *ptr;
+    wchar_t outchar;
+    int ret;
+
+    /*
+     * We are guaranteed to set a valid wide last character,
+     * although it may be WEOF (which is technically not
+     * a wide character at all...)
+     */
+    lastchar_wide_valid = 1;
+
+    if (inchar == EOF)
+	return lastchar_wide = WEOF;
+
+    /* reset shift state by converting null */
+    mbrtowc(&outchar, &cnull, 1, &ps);
+
+    ptr = buf;
+    *ptr++ = inchar;
+    /*
+     * Return may be zero if we have a NULL; handle this like
+     * any other character.
+     */
+    while ((ret = mbrtowc(&outchar, buf, ptr - buf, &ps)) < 0) {
+	if (ret == -1) {
+	    /*
+	     * Invalid input.  Hmm, what's the right thing to do here?
+	     */
+	    return lastchar_wide = WEOF;
+	}
+	/* No timeout here as we really need the character. */
+	inchar = getbyte(0);
+	if (inchar == EOF)
+	    return lastchar_wide = WEOF;
+	*ptr++ = inchar;
+    }
+    return lastchar_wide = (wint_t)outchar;
+}
+/**/
+#endif
+
+
 /**/
 void
 zlecore(void)
@@ -1445,7 +1562,7 @@ setup_(UNUSED(Module m))
     zlereadptr = zleread;
     zlesetkeymapptr = zlesetkeymap;
 
-    getkeyptr = getkey;
+    getkeyptr = getbyte;
 
     /* initialise the thingies */
     init_thingies();
diff --git a/Src/Zle/zle_misc.c b/Src/Zle/zle_misc.c
index 86a0137b3..134ae21af 100644
--- a/Src/Zle/zle_misc.c
+++ b/Src/Zle/zle_misc.c
@@ -34,13 +34,13 @@
 
 /**/
 void
-doinsert(char *str)
+doinsert(ZLE_STRING_T zstr, int len)
 {
-    char *s;
-    int len = ztrlen(str);
-    int c1 = *str == Meta ? STOUC(str[1])^32 : STOUC(*str);/* first character */
+    ZLE_STRING_T s;
+    ZLE_CHAR_T c1 = *zstr;	     /* first character */
     int neg = zmult < 0;             /* insert *after* the cursor? */
     int m = neg ? -zmult : zmult;    /* number of copies to insert */
+    int count;
 
     iremovesuffix(c1, 0);
     invalidatelist();
@@ -50,8 +50,8 @@ doinsert(char *str)
     else if(zlecs + m * len > zlell)
 	spaceinline(zlecs + m * len - zlell);
     while(m--)
-	for(s = str; *s; s++)
-	    zleline[zlecs++] = *s == Meta ? *++s ^ 32 : *s;
+	for(s = zstr, count = len; count; s++, count--)
+	    zleline[zlecs++] = *s;
     if(neg)
 	zlecs += zmult * len;
 }
@@ -60,25 +60,41 @@ doinsert(char *str)
 mod_export int
 selfinsert(UNUSED(char **args))
 {
-    char s[3], *p = s;
-
-    if(imeta(lastchar)) {
-	*p++ = Meta;
-	lastchar ^= 32;
-    }
-    *p++ = lastchar;
-    *p = 0;
-    doinsert(s);
+#ifdef ZLE_UNICODE_SUPPORT
+    if (!lastchar_wide_valid)
+	getrestchar(lastchar);
+    doinsert(&lastchar_wide, 1);
+#else
+    char s = lastchar;
+    doinsert(&s, 1);
+#endif
     return 0;
 }
 
 /**/
-mod_export int
-selfinsertunmeta(char **args)
+mod_export void
+fixunmeta(void)
 {
     lastchar &= 0x7f;
     if (lastchar == '\r')
 	lastchar = '\n';
+#ifdef ZLE_UNICODE_SUPPORT
+    /*
+     * TODO: can we do this better?
+     * We need a wide character to insert.
+     * selfinsertunmeta is intrinsically problematic
+     * with multibyte input.
+     */
+    lastchar_wide = (ZLE_CHAR_T)lastchar;
+    lastchar_wide_valid = TRUE;
+#endif
+}
+
+/**/
+mod_export int
+selfinsertunmeta(char **args)
+{
+    fixunmeta();
     return selfinsert(args);
 }
 
@@ -490,11 +506,11 @@ quotedinsert(char **args)
     sob.sg_flags = (sob.sg_flags | RAW) & ~ECHO;
     ioctl(SHTTY, TIOCSETN, &sob);
 #endif
-    lastchar = getkey(0);
+    getfullchar(0);
 #ifndef HAS_TIO
     zsetterm();
 #endif
-    if (lastchar < 0)
+    if (LASTFULLCHAR == ZLEEOF)
 	return 1;
     else
 	return selfinsert(args);
@@ -506,9 +522,20 @@ digitargument(UNUSED(char **args))
 {
     int sign = (zmult < 0) ? -1 : 1;
 
+#ifdef ZLE_UNICODE_SUPPORT
+    /*
+     * It's too dangerous to allow metafied input.  See
+     * universalargument for comments on (possibly suboptimal) handling
+     * of digits.  We are assuming ASCII is a subset of the multibyte
+     * encoding.
+     */
+    if (lastchar < '0' || lastchar > '9')
+	return 1;
+#else
     /* allow metafied as well as ordinary digits */
     if ((lastchar & 0x7f) < '0' || (lastchar & 0x7f) > '9')
 	return 1;
+#endif
 
     if (!(zmod.flags & MOD_TMULT))
 	zmod.tmult = 0;
@@ -546,7 +573,22 @@ universalargument(char **args)
 	zmod.flags |= MOD_MULT;
 	return 0;
     }
-    while ((gotk = getkey(0)) != EOF) {
+    /*
+     * TODO: this is quite tricky to do when trying to maintain
+     * compatibility between the old input system and Unicode.
+     * We don't know what follows the digits, so if we try to
+     * read wide characters we may fail (e.g. we may come across an old
+     * \M-style binding).
+     *
+     * If we assume individual bytes are either explicitly ASCII or
+     * not (a la UTF-8), we get away with it; we can back up individual
+     * bytes and everything will work.  We may want to relax this
+     * assumption later.  ("Much later" - (C) Steven Singer,
+     * CSR BlueCore firmware, ca. 2000.)
+     *
+     * Hence for now this remains byte-by-byte.
+     */
+    while ((gotk = getbyte(0)) != EOF) {
 	if (gotk == '-' && !digcnt) {
 	    minus = -1;
 	    digcnt++;
@@ -554,7 +596,7 @@ universalargument(char **args)
 	    pref = pref * 10 + (gotk & 0xf);
 	    digcnt++;
 	} else {
-	    ungetkey(gotk);
+	    ungetbyte(gotk);
 	    break;
 	}
     }
@@ -765,24 +807,32 @@ executenamedcommand(char *prmt)
 	} else if(cmd == Th(z_viquotedinsert)) {
 	    *ptr = '^';
 	    zrefresh();
-	    lastchar = getkey(0);
-	    if(lastchar == EOF || !lastchar || len == NAMLEN)
+	    getfullchar(0);
+	    if(LASTFULLCHAR == ZLEEOF || !LASTFULLCHAR || len == NAMLEN)
 		feep = 1;
-	    else
+	    else {
+		/* TODO: convert back to multibyte string */
 		*ptr++ = lastchar, len++, curlist = 0;
+	    }
 	} else if(cmd == Th(z_quotedinsert)) {
-	    if((lastchar = getkey(0)) == EOF || !lastchar || len == NAMLEN)
+	    if(getfullchar(0) == ZLEEOF ||
+	       !LASTFULLCHAR || len == NAMLEN)
 		feep = 1;
-	    else
+	    else {
+		/* TODO: convert back to multibyte string */
 		*ptr++ = lastchar, len++, curlist = 0;
+	    }
 	} else if(cmd == Th(z_backwarddeletechar) ||
 	    	cmd == Th(z_vibackwarddeletechar)) {
-	    if (len)
+	    if (len) {
+		/* TODO: backward full character in multibyte string. Yuk. */
 		len--, ptr--, curlist = 0;
+	    }
 	} else if(cmd == Th(z_killregion) || cmd == Th(z_backwardkillword) ||
 		  cmd == Th(z_vibackwardkillword)) {
 	    if (len)
 		curlist = 0;
+	    /* TODO: backward full character in multibyte string. Yuk. */
 	    while (len && (len--, *--ptr != '-'));
 	} else if(cmd == Th(z_killwholeline) || cmd == Th(z_vikillline) ||
 	    	cmd == Th(z_backwardkillline)) {
@@ -812,9 +862,7 @@ executenamedcommand(char *prmt)
 		unrefthingy(r);
 	    }
 	    if(cmd == Th(z_selfinsertunmeta)) {
-		lastchar &= 0x7f;
-		if(lastchar == '\r')
-		    lastchar = '\n';
+		fixunmeta();
 		cmd = Th(z_selfinsert);
 	    }
 	    if (cmd == Th(z_listchoices) || cmd == Th(z_deletecharorlist) ||
@@ -867,11 +915,24 @@ executenamedcommand(char *prmt)
 		    len = cmdambig;
 		}
 	    } else {
-		if (len == NAMLEN || icntrl(lastchar) ||
-		    cmd != Th(z_selfinsert))
+		if (len == NAMLEN || cmd != Th(z_selfinsert))
 		    feep = 1;
-		else
-		    *ptr++ = lastchar, len++, curlist = 0;
+		else {
+#ifdef ZLE_UNICODE_SUPPORT
+		    if (!lastchar_wide_valid)
+			getrestchar(0);
+		    if (iswcntrl(lastchar))
+#else
+		    if (icntrl(lastchar))
+#endif
+		    {
+			feep = 1;
+		    }
+		    else {
+			/* TODO: convert back to multibyte string */
+			*ptr++ = lastchar, len++, curlist = 0;
+		    }
+		}
 	    }
 	}
 	if (feep)
@@ -911,6 +972,9 @@ executenamedcommand(char *prmt)
 /* Length of suffix to remove when inserting each possible character value.  *
  * suffixlen[256] is the length to remove for non-insertion editing actions. */
 
+/*
+ * TODO: Aargh, this is completely broken with wide characters.
+ */
 /**/
 mod_export int suffixlen[257];
 
@@ -1000,7 +1064,7 @@ makesuffixstr(char *f, char *s, int n)
 
 /**/
 mod_export void
-iremovesuffix(int c, int keep)
+iremovesuffix(ZLE_CHAR_T c, int keep)
 {
     if (suffixfunc) {
 	Eprog prog = getshfunc(suffixfunc);
@@ -1024,7 +1088,12 @@ iremovesuffix(int c, int keep)
 	zsfree(suffixfunc);
 	suffixfunc = NULL;
     } else {
+#ifdef ZLE_UNICODE_SUPPORT
+	/* TODO: best I can think of for now... */
+	int sl = (unsigned int)c < 256 ? suffixlen[c] : 0;
+#else
 	int sl = suffixlen[c];
+#endif
 	if(sl) {
 	    backdel(sl);
 	    if (!keep)
diff --git a/Src/Zle/zle_move.c b/Src/Zle/zle_move.c
index 48e9dd64f..b939df06b 100644
--- a/Src/Zle/zle_move.c
+++ b/Src/Zle/zle_move.c
@@ -353,13 +353,14 @@ vibeginningofline(UNUSED(char **args))
     return 0;
 }
 
-static int vfindchar, vfinddir, tailadd;
+static ZLE_INT_T vfindchar;
+static int vfinddir, tailadd;
 
 /**/
 int
 vifindnextchar(char **args)
 {
-    if ((vfindchar = vigetkey()) != -1) {
+    if ((vfindchar = vigetkey()) != ZLEEOF) {
 	vfinddir = 1;
 	tailadd = 0;
 	return virepeatfind(args);
@@ -371,7 +372,7 @@ vifindnextchar(char **args)
 int
 vifindprevchar(char **args)
 {
-    if ((vfindchar = vigetkey()) != -1) {
+    if ((vfindchar = vigetkey()) != ZLEEOF) {
 	vfinddir = -1;
 	tailadd = 0;
 	return virepeatfind(args);
@@ -383,7 +384,7 @@ vifindprevchar(char **args)
 int
 vifindnextcharskip(char **args)
 {
-    if ((vfindchar = vigetkey()) != -1) {
+    if ((vfindchar = vigetkey()) != ZLEEOF) {
 	vfinddir = 1;
 	tailadd = -1;
 	return virepeatfind(args);
@@ -395,7 +396,7 @@ vifindnextcharskip(char **args)
 int
 vifindprevcharskip(char **args)
 {
-    if ((vfindchar = vigetkey()) != -1) {
+    if ((vfindchar = vigetkey()) != ZLEEOF) {
 	vfinddir = -1;
 	tailadd = 1;
 	return virepeatfind(args);
@@ -465,12 +466,12 @@ vifirstnonblank(UNUSED(char **args))
 int
 visetmark(UNUSED(char **args))
 {
-    int ch;
+    ZLE_INT_T ch;
 
-    ch = getkey(0);
-    if (ch < 'a' || ch > 'z')
+    ch = getfullchar(0);
+    if (ch < LETTER_a || ch > LETTER_z)
 	return 1;
-    ch -= 'a';
+    ch -= LETTER_a;
     vimarkcs[ch] = zlecs;
     vimarkline[ch] = histline;
     return 0;
@@ -480,15 +481,15 @@ visetmark(UNUSED(char **args))
 int
 vigotomark(UNUSED(char **args))
 {
-    int ch;
+    ZLE_INT_T ch;
 
-    ch = getkey(0);
-    if (ch == lastchar)
+    ch = getfullchar(0);
+    if (ch == LASTFULLCHAR)
 	ch = 26;
     else {
-	if (ch < 'a' || ch > 'z')
+	if (ch < LETTER_a || ch > LETTER_z)
 	    return 1;
-	ch -= 'a';
+	ch -= LETTER_a;
     }
     if (!vimarkline[ch])
 	return 1;
diff --git a/Src/Zle/zle_thingy.c b/Src/Zle/zle_thingy.c
index 441d85b2c..494e1ade5 100644
--- a/Src/Zle/zle_thingy.c
+++ b/Src/Zle/zle_thingy.c
@@ -473,7 +473,7 @@ bin_zle_unget(char *name, char **args, UNUSED(Options ops), UNUSED(char func))
 	return 1;
     }
     while (p > b)
-	ungetkey((int) *--p);
+	ungetbyte((int) *--p);
     return 0;
 }
 
diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c
index a4d87dfea..15c723d7a 100644
--- a/Src/Zle/zle_tricky.c
+++ b/Src/Zle/zle_tricky.c
@@ -2298,12 +2298,27 @@ doexpandhist(void)
 }
 
 /**/
+void
+fixmagicspace(void)
+{
+    lastchar = ' ';
+#ifdef ZLE_UNICODE_SUPPORT
+    /*
+     * This is redundant if the multibyte encoding extends ASCII,
+     * since lastchar is a full character, but it's safer anyway...
+     */
+    lastchar_wide = L' ';
+    lastchar_wide_valid = TRUE;
+#endif
+}
+
+/**/
 int
 magicspace(char **args)
 {
     char *bangq;
     int ret;
-    lastchar = ' ';
+    fixmagicspace();
     for (bangq = (char *)zleline; (bangq = strchr(bangq, bangchar));
 	 bangq += 2)
 	if (bangq[1] == '"' && (bangq == (char *)zleline || bangq[-1] != '\\'))
diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c
index ffd94def8..e6f696935 100644
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@@ -510,7 +510,7 @@ hstrnstr(char *haystack, int pos, char *needle, int len, int dir, int sens)
 mod_export int
 getzlequery(int yesno)
 {
-    int c;
+    ZLE_INT_T c;
 #ifdef FIONREAD
     int val;
 
@@ -525,18 +525,18 @@ getzlequery(int yesno)
 #endif
 
     /* get a character from the tty and interpret it */
-    c = getkey(0);
+    c = getfullchar(0);
     if (yesno) {
-	if (c == '\t')
-	    c = 'y';
+	if (c == ZLETAB)
+	    c = LETTER_y;
 	else if (icntrl(c) || c == EOF)
-	    c = 'n';
+	    c = LETTER_n;
 	else
 	    c = tulower(c);
     }
     /* echo response and return */
-    if (c != '\n')
-	putc(c, shout);
+    if (c != ZLENL)
+	putc(c, shout);		/* TODO: convert to multibyte */
     return c;
 }
 
diff --git a/Src/Zle/zle_vi.c b/Src/Zle/zle_vi.c
index 15a2deb49..b45ccf10a 100644
--- a/Src/Zle/zle_vi.c
+++ b/Src/Zle/zle_vi.c
@@ -50,6 +50,11 @@ int vilinerange;
 /**/
 int vichgbufsz, vichgbufptr, vichgflag;
 
+/*
+ * TODO: need consistent handling of vichgbuf: ZLE_STRING_T or
+ * char *?  Consequently, use of lastchar in this file needs fixing
+ * too.
+ */
 /**/
 char *vichgbuf;
 
@@ -95,15 +100,15 @@ startvitext(int im)
 }
 
 /**/
-int
+ZLE_INT_T
 vigetkey(void)
 {
     Keymap mn = openkeymap("main");
     char m[3], *str;
     Thingy cmd;
 
-    if((lastchar = getkey(0)) == EOF)
-	return -1;
+    if(getbyte(0) == EOF)
+	return ZLEEOF;
 
     m[0] = lastchar;
     metafy(m, 1, META_NOALLOC);
@@ -112,23 +117,35 @@ vigetkey(void)
     else
 	cmd = t_undefinedkey;
 
+    /*
+     * TODO: if this was bound to self-insert, we may
+     * be on the first character of a multibyte string
+     * and need to acquire the rest.
+     */
     if (!cmd || cmd == Th(z_sendbreak)) {
-	return -1;
+	return ZLEEOF;
     } else if (cmd == Th(z_quotedinsert)) {
-	if ((lastchar = getkey(0)) == EOF)
-	    return -1;
+	if (getfullchar(0) == ZLEEOF)
+	    return ZLEEOF;
     } else if(cmd == Th(z_viquotedinsert)) {
-	char sav = zleline[zlecs];
+	ZLE_CHAR_T sav = zleline[zlecs];
 
 	zleline[zlecs] = '^';
 	zrefresh();
-	lastchar = getkey(0);
+	getfullchar(0);
 	zleline[zlecs] = sav;
-	if(lastchar == EOF)
-	    return -1;
-    } else if (cmd == Th(z_vicmdmode))
-	return -1;
-    return lastchar;
+	if(LASTFULLCHAR == ZLEEOF)
+	    return ZLEEOF;
+    } else if (cmd == Th(z_vicmdmode)) {
+	return ZLEEOF;
+    }
+#ifdef ZLE_UNICODE_SUPPORT
+    if (!lastchar_wide_valid)
+    {
+	getrestchar(lastchar);
+    }
+#endif
+    return LASTFULLCHAR;
 }
 
 /**/
@@ -489,7 +506,7 @@ vireplacechars(UNUSED(char **args))
 	return 1;
     }
     /* get key */
-    if((ch = vigetkey()) == -1) {
+    if((ch = vigetkey()) == ZLEEOF) {
 	vichgflag = 0;
 	return 1;
     }
@@ -593,7 +610,7 @@ virepeatchange(UNUSED(char **args))
     }
     /* repeat the command */
     inrepeat = 1;
-    ungetkeys(vichgbuf, vichgbufptr);
+    ungetbytes(vichgbuf, vichgbufptr);
     return 0;
 }
 
@@ -817,26 +834,35 @@ vicapslockpanic(UNUSED(char **args))
     statusline = "press a lowercase key to continue";
     statusll = strlen(statusline);
     zrefresh();
-    while (!islower(getkey(0)));
+#ifdef ZLE_UNICODE_SUPPORT
+    while (!iswlower(getfullchar(0)));
+#else
+    while (!islower(getfullchar(0)));
+#endif
     statusline = NULL;
     return 0;
 }
 
+#ifdef ZLE_UNICODE_SUPPORT
+#else
+#endif
+
 /**/
 int
 visetbuffer(UNUSED(char **args))
 {
-    int ch;
+    ZLE_INT_T ch;
 
     if ((zmod.flags & MOD_VIBUF) ||
-	(((ch = getkey(0)) < '1' || ch > '9') &&
-	 (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')))
+	(((ch = getfullchar(0)) < DIGIT_1 || ch > DIGIT_9) &&
+	 (ch < LETTER_a || ch > LETTER_z) &&
+	 (ch < LETTER_A || ch > LETTER_Z)))
 	return 1;
-    if (ch >= 'A' && ch <= 'Z')	/* needed in cut() */
+    if (ch >= LETTER_A && ch <= LETTER_Z)	/* needed in cut() */
 	zmod.flags |= MOD_VIAPP;
     else
 	zmod.flags &= ~MOD_VIAPP;
-    zmod.vibuf = tulower(ch) + (idigit(ch) ? -'1' + 26 : -'a');
+    zmod.vibuf = tulower(ch) + (idigit(ch) ? - DIGIT_1 + 26 : -LETTER_a);
     zmod.flags |= MOD_VIBUF;
     prefixflag = 1;
     return 0;
@@ -897,12 +923,12 @@ viquotedinsert(char **args)
     sob.sg_flags = (sob.sg_flags | RAW) & ~ECHO;
     ioctl(SHTTY, TIOCSETN, &sob);
 #endif
-    lastchar = getkey(0);
+    getfullchar(0);
 #ifndef HAS_TIO
     zsetterm();
 #endif
     foredel(1);
-    if(lastchar < 0)
+    if(LASTFULLCHAR == ZLEEOF)
 	return 1;
     else
 	return selfinsert(args);
author	Peter Stephenson <pws@users.sourceforge.net>	2005-02-18 13:57:25 +0000
committer	Peter Stephenson <pws@users.sourceforge.net>	2005-02-18 13:57:25 +0000
commit	d7c13fb2c3b1b014acde9c1cb17a1e34239b9751 (patch)
tree	7b31d7d08233e1cc1b1ab46af1ac44b25ed4f2c1 /Src/Zle
parent	294ef9e87237bf1dc12b17a26bc4b22aa5604282 (diff)
download	zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.tar.gz zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.tar.xz zsh-d7c13fb2c3b1b014acde9c1cb17a1e34239b9751.zip