about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Doc/Zsh/builtins.yo2
-rw-r--r--Src/utils.c92
-rw-r--r--zshconfig.ac7
4 files changed, 102 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index dd1b20142..e4e024b18 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2003-03-14  Oliver Kiddle  <opk@zsh.org>
+
+	* 18343, 18348: zshconfig.ac, Doc/Zsh/builtins.yo, Src/utils.c:
+	handle \u and \U escapes for specifying unicode characters
+
 2003-03-15  Doug Kearns  <djkea2@mugca.its.monash.edu.au>
 
 	* 18351: Completion/Unix/Command/_antiword: new completion for
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index 899bc96d8..67d2c11e4 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -278,6 +278,8 @@ sitem(tt(\v))(vertical tab)
 sitem(tt(\\))(backslash)
 sitem(tt(\0)var(NNN))(character code in octal)
 sitem(tt(\x)var(NN))(character code in hexadecimal)
+sitem(tt(\u)var(NNNN))(unicode character code in hexadecimal)
+sitem(tt(\U)var(NNNNNNNN))(unicode character code in hexadecimal)
 endsitem()
 
 pindex(BSD_ECHO, use of)
diff --git a/Src/utils.c b/Src/utils.c
index 64a6a722e..2b0e7faea 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -30,6 +30,15 @@
 #include "zsh.mdh"
 #include "utils.pro"
 
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#include <wchar.h>
+#  ifndef __STDC_ISO_10646__
+#    if defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+#      include <iconv.h>
+#    endif
+#  endif
+#endif
+
 /* name of script being sourced */
 
 /**/
@@ -3274,7 +3283,8 @@ dquotedzputs(char const *s, FILE *stream)
  *       for no newlines.
  *   3:  As 1, but don't handle \c.
  *   4:  Do $'...' quoting.  Overwrites the existing string instead of
- *       zhalloc'ing 
+ *       zhalloc'ing. If \uNNNN ever generates multi-byte chars longer
+ *       than 6 bytes, will need to adjust this to re-allocate memory.
  *   5:  As 2, but \- is special.  Expects misc to be defined.
  *   6:  As 2, but parses only one character and returns end-pointer
  *       and parsed character in *misc
@@ -3288,11 +3298,28 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
     char *t, *u = NULL;
     char svchar = '\0';
     int meta = 0, control = 0;
+    int i;
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#  ifdef __STDC_ISO_10646__
+    wint_t wval;
+#  elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+    unsigned int wval;
+    iconv_t cd;
+    char inbuf[4];
+    wchar_t outbuf[1];
+    size_t inbytes, outbytes;
+    char *inptr, *outptr;
+#  endif
+    size_t count;
+    size_t buflen = MB_LEN_MAX * (strlen(s) / 6) + (strlen(s) % 6) + 1;
+#else
+    size_t buflen = strlen(s) + 1;
+#endif
 
     if (fromwhere == 6)
 	t = buf = tmp;
     else if (fromwhere != 4)
-	t = buf = zhalloc(strlen(s) + 1);
+	t = buf = zhalloc(buflen);
     else {
 	t = buf = s;
 	s += 2;
@@ -3363,6 +3390,67 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
 		    *misc = 1;
 		    break;
 		}
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#if defined(__STDC_ISO_10646__) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+	    case 'u':
+	    case 'U':
+	    	wval = 0;
+		for (i=(*s == 'u' ? 4 : 8); i>0; i--) {
+		    if (*++s && idigit(*s))
+		        wval = wval * 16 + (*s - '0');
+		    else if (*s && (*s >= 'a' && *s <= 'f') ||
+		            (*s >= 'A' && *s <= 'F'))
+		        wval = wval * 16 + (*s & 0x1f) + 9;
+		    else {
+		    	s--;
+		        break;
+		    }
+		}
+    	    	if (fromwhere == 6) {
+		    *misc = wval;
+		    return s+1;
+		}
+#ifdef __STDC_ISO_10646__
+		count = wctomb(t, (wchar_t)wval);
+#elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+    	    	inbytes = outbytes = 4;
+    	    	inptr = inbuf;
+    	    	outptr = (char *)outbuf;
+		/* assume big endian convention for UCS-4 */
+		for (i=3;i>=0;i--) {
+		    inbuf[i] = wval & 0xff;
+		    wval >>= 8;
+		}
+    	    	
+    	    	cd = iconv_open("WCHAR_T", "ISO-10646");
+		if (cd == (iconv_t)-1) {
+		    zerr("cannot do charset conversion", NULL, 0);
+		    if (fromwhere == 4) {
+			for (u = t; (*u++ = *++s););
+			return t;
+		    }
+		    *t = '\0';
+		    *len = t - buf;
+		    return buf;
+		}
+                iconv(cd, &inptr, &inbytes, &outptr, &outbytes);
+		iconv_close(cd);
+		count = wctomb(t, *outbuf);
+#endif
+		if (count == (size_t)-1) {
+		    zerr("character not in range", NULL, 0);
+		    if (fromwhere == 4) {
+			for (u = t; (*u++ = *++s););
+			return t;
+		    }
+		    *t = '\0';
+		    *len = t - buf;
+		    return buf;
+		}
+		t += count;  
+		continue;
+#endif
+#endif
 	    default:
 	    def:
 		if ((idigit(*s) && *s < '8') || *s == 'x') {
diff --git a/zshconfig.ac b/zshconfig.ac
index 33ca83f78..fd578c028 100644
--- a/zshconfig.ac
+++ b/zshconfig.ac
@@ -494,7 +494,7 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
 		 limits.h fcntl.h libc.h sys/utsname.h sys/resource.h \
 		 locale.h errno.h stdio.h stdlib.h unistd.h sys/capability.h \
 		 utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
-		 netinet/in_systm.h pcre.h langinfo.h)
+		 netinet/in_systm.h pcre.h langinfo.h wchar.h)
 if test $dynamic = yes; then
   AC_CHECK_HEADERS(dlfcn.h)
   AC_CHECK_HEADERS(dl.h)
@@ -663,6 +663,8 @@ AC_CHECK_LIB(cap, cap_get_proc)
 
 AC_CHECK_LIB(socket, socket)
 
+AC_CHECK_LIB(iconv, iconv)
+
 dnl pcre-config should probably be employed here
 AC_SEARCH_LIBS(pcre_compile, pcre)
 
@@ -959,7 +961,8 @@ AC_CHECK_FUNCS(strftime difftime gettimeofday \
 	       tgetent tigetflag tigetnum tigetstr setupterm \
 	       pcre_compile pcre_study pcre_exec \
 	       nl_langinfo \
-	       erand48 open_memstream)
+	       erand48 open_memstream \
+	       wctomb iconv)
 AC_FUNC_STRCOLL
 
 dnl  Check if tgetent accepts NULL (and will allocate its own termcap buffer)