about summary refs log tree commit diff
path: root/Src/utils.c
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2006-08-01 21:28:04 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2006-08-01 21:28:04 +0000
commitbb912594b2325334a603893e90e8d9aa7cc534ca (patch)
treebbbd081b8b2191081b260056ce17a4876b74227a /Src/utils.c
parent7d77bc95b2ba7276cf430a989e9b4000c72ba765 (diff)
downloadzsh-bb912594b2325334a603893e90e8d9aa7cc534ca.tar.gz
zsh-bb912594b2325334a603893e90e8d9aa7cc534ca.tar.xz
zsh-bb912594b2325334a603893e90e8d9aa7cc534ca.zip
22575: multibyte fixes for bslashquote(), getzlequery()
Diffstat (limited to 'Src/utils.c')
-rw-r--r--Src/utils.c250
1 files changed, 138 insertions, 112 deletions
diff --git a/Src/utils.c b/Src/utils.c
index 0574431d0..a81c4ed04 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2835,7 +2835,7 @@ wcsitype(wchar_t c, int itype)
     if (len == 0) {
 	/* NULL is special */
 	return zistype(0, itype);
-    } else if (len == 1 && iascii(*outstr)) {
+    } else if (len == 1 && isascii(*outstr)) {
 	return zistype(*outstr, itype);
     } else {
 	switch (itype) {
@@ -2897,7 +2897,7 @@ itype_end(const char *ptr, int itype, int once)
 		/* in this case non-ASCII characters can't match */
 		if (chr > 127 || !zistype(chr,itype))
 		    break;
-	    } else if (len == 1 && iascii(*ptr)) {
+	    } else if (len == 1 && isascii(*ptr)) {
 		/* ASCII: can't be metafied, use standard test */
 		if (!zistype(*ptr,itype))
 		    break;
@@ -4017,7 +4017,7 @@ hasspecial(char const *s)
  * The last argument should be zero if this is to be used outside a string, *
  * one if it is to be quoted for the inside of a single quoted string,      *
  * two if it is for the inside of a double quoted string, and               *
- * three if it is for the inside of a posix quoted string.                  *
+ * three if it is for the inside of a $'...' quoted string.                 *
  * The string may be metafied and contain tokens.                           */
 
 /**/
@@ -4031,127 +4031,153 @@ bslashquote(const char *s, char **e, int instring)
 
     tt = v = buf;
     u = s;
-    for (; *u; u++) {
-	if (e && *e == u)
-	    *e = v, sf = 1;
-	if (instring == 3) {
-	  int c = *u;
-	  if (c == Meta) {
-	    c = *++u ^ 32;
-	  }
-	  c &= 0xff;
-	  if(isprint(c)) {
-	    switch (c) {
-	    case '\\':
-	    case '\'':
-	      *v++ = '\\';
-	      *v++ = c;
-	      break;
+    if (instring == 3) {
+	/*
+	 * As we test for printability here we need to be able
+	 * to look for multibyte characters.
+	 */
+	convchar_t cc;
+	MB_METACHARINIT();
+	while (*u) {
+	    const char *uend = u + MB_METACHARLENCONV(u, &cc);
+
+	    if (e && !sf && *e <= u) {
+		*e = v;
+		sf = 1;
+	    }
+	    if (
+#ifdef MULTIBYTE_SUPPORT
+		cc != WEOF && 
+#endif
+		MB_ISPRINT(cc)) {
+		switch (cc) {
+		case ZWC('\\'):
+		case ZWC('\''):
+		    *v++ = '\\';
+		    break;
 
-	    default:
-	      if(imeta(c)) {
-		*v++ = Meta;
-		*v++ = c ^ 32;
-	      }
-	      else {
-		if (isset(BANGHIST) && c == bangchar) {
-		  *v++ = '\\';
+		default:
+		    if (isset(BANGHIST) && cc == (wchar_t)bangchar)
+			*v++ = '\\';
+		    break;
 		}
-		*v++ = c;
-	      }
-	      break;
-	    }
-	  }
-	  else {
-	    switch (c) {
-	    case '\0':
-	      *v++ = '\\';
-	      *v++ = '0';
-	      if ('0' <= u[1] && u[1] <= '7') {
-		*v++ = '0';
-		*v++ = '0';
-	      }
-	      break;
-
-	    case '\007': *v++ = '\\'; *v++ = 'a'; break;
-	    case '\b': *v++ = '\\'; *v++ = 'b'; break;
-	    case '\f': *v++ = '\\'; *v++ = 'f'; break;
-	    case '\n': *v++ = '\\'; *v++ = 'n'; break;
-	    case '\r': *v++ = '\\'; *v++ = 'r'; break;
-	    case '\t': *v++ = '\\'; *v++ = 't'; break;
-	    case '\v': *v++ = '\\'; *v++ = 'v'; break;
+		while (u < uend)
+		    *v++ = *u++;
+	    } else {
+		/* Not printable */
+		for (; u < uend; u++) {
+		    /*
+		     * Just do this byte by byte; there's no great
+		     * advantage in being clever with multibyte
+		     * characters if we don't think they're printable.
+		     */
+		    int c;
+		    if (*u == Meta)
+			c = STOUC(*++u ^ 32);
+		    else
+			c = STOUC(*u);
+		    switch (c) {
+		    case '\0':
+			*v++ = '\\';
+			*v++ = '0';
+			if ('0' <= u[1] && u[1] <= '7') {
+			    *v++ = '0';
+			    *v++ = '0';
+			}
+			break;
 
-	    default:
-	      *v++ = '\\';
-	      *v++ = '0' + ((c >> 6) & 7);
-	      *v++ = '0' + ((c >> 3) & 7);
-	      *v++ = '0' + (c & 7);
-	      break;
+		    case '\007': *v++ = '\\'; *v++ = 'a'; break;
+		    case '\b': *v++ = '\\'; *v++ = 'b'; break;
+		    case '\f': *v++ = '\\'; *v++ = 'f'; break;
+		    case '\n': *v++ = '\\'; *v++ = 'n'; break;
+		    case '\r': *v++ = '\\'; *v++ = 'r'; break;
+		    case '\t': *v++ = '\\'; *v++ = 't'; break;
+		    case '\v': *v++ = '\\'; *v++ = 'v'; break;
+
+		    default:
+			*v++ = '\\';
+			*v++ = '0' + ((c >> 6) & 7);
+			*v++ = '0' + ((c >> 3) & 7);
+			*v++ = '0' + (c & 7);
+			break;
+		    }
+		}
 	    }
-	  }
-	  continue;
 	}
-	else if (*u == Tick || *u == Qtick) {
-	    char c = *u++;
+    }
+    else
+    {
+	/*
+	 * Here the only special characters are syntactic, so
+	 * we can go through bytewise.
+	 */
+	for (; *u; u++) {
+	    if (e && *e == u)
+		*e = v, sf = 1;
+	    if (*u == Tick || *u == Qtick) {
+		char c = *u++;
+
+		*v++ = c;
+		while (*u && *u != c)
+		    *v++ = *u++;
+		*v++ = c;
+		if (!*u)
+		    u--;
+		continue;
+	    }
+	    else if ((*u == String || *u == Qstring) &&
+		     (u[1] == Inpar || u[1] == Inbrack || u[1] == Inbrace)) {
+		char c = (u[1] == Inpar ? Outpar : (u[1] == Inbrace ?
+						    Outbrace : Outbrack));
+		char beg = *u;
+		int level = 0;
 
-	    *v++ = c;
-	    while (*u && *u != c)
 		*v++ = *u++;
-	    *v++ = c;
-	    if (!*u)
-		u--;
-	    continue;
-	}
-	else if ((*u == String || *u == Qstring) &&
-		 (u[1] == Inpar || u[1] == Inbrack || u[1] == Inbrace)) {
-	    char c = (u[1] == Inpar ? Outpar : (u[1] == Inbrace ?
-						Outbrace : Outbrack));
-	    char beg = *u;
-	    int level = 0;
-
-	    *v++ = *u++;
-	    *v++ = *u++;
-	    while (*u && (*u != c || level)) {
-		if (*u == beg)
-		    level++;
-		else if (*u == c)
-		    level--;
 		*v++ = *u++;
-	    }
-	    if (*u)
-		*v++ = *u;
-	    else
-		u--;
-	    continue;
-	}
-	else if (ispecial(*u) &&
-		 ((*u != '=' && *u != '~') ||
-		  u == s ||
-		  (isset(MAGICEQUALSUBST) && (u[-1] == '=' || u[-1] == ':')) ||
-		  (*u == '~' && isset(EXTENDEDGLOB))) &&
-	    (!instring ||
-	     (isset(BANGHIST) && *u == (char)bangchar && instring != 1) ||
-	     (instring == 2 &&
-	      (*u == '$' || *u == '`' || *u == '\"' || *u == '\\')) ||
-	     (instring == 1 && *u == '\''))) {
-	    if (*u == '\n' || (instring == 1 && *u == '\'')) {
-		if (unset(RCQUOTES)) {
-		    *v++ = '\'';
-		    if (*u == '\'')
-			*v++ = '\\';
+		while (*u && (*u != c || level)) {
+		    if (*u == beg)
+			level++;
+		    else if (*u == c)
+			level--;
+		    *v++ = *u++;
+		}
+		if (*u)
 		    *v++ = *u;
-		    *v++ = '\'';
-		} else if (*u == '\n')
-		    *v++ = '"', *v++ = '\n', *v++ = '"';
 		else
-		    *v++ = '\'', *v++ = '\'';
+		    u--;
 		continue;
-	    } else
-		*v++ = '\\';
+	    }
+	    else if (ispecial(*u) &&
+		     ((*u != '=' && *u != '~') ||
+		      u == s ||
+		      (isset(MAGICEQUALSUBST) &&
+		       (u[-1] == '=' || u[-1] == ':')) ||
+		      (*u == '~' && isset(EXTENDEDGLOB))) &&
+		     (!instring ||
+		      (isset(BANGHIST) && *u == (char)bangchar &&
+		       instring != 1) ||
+		      (instring == 2 &&
+		       (*u == '$' || *u == '`' || *u == '\"' || *u == '\\')) ||
+		      (instring == 1 && *u == '\''))) {
+		if (*u == '\n' || (instring == 1 && *u == '\'')) {
+		    if (unset(RCQUOTES)) {
+			*v++ = '\'';
+			if (*u == '\'')
+			    *v++ = '\\';
+			*v++ = *u;
+			*v++ = '\'';
+		    } else if (*u == '\n')
+			*v++ = '"', *v++ = '\n', *v++ = '"';
+		    else
+			*v++ = '\'', *v++ = '\'';
+		    continue;
+		} else
+		    *v++ = '\\';
+	    }
+	    if(*u == Meta)
+		*v++ = *u++;
+	    *v++ = *u;
 	}
-	if(*u == Meta)
-	    *v++ = *u++;
-	*v++ = *u;
     }
     *v = '\0';