about summary refs log tree commit diff
path: root/Src
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2006-06-28 13:12:55 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2006-06-28 13:12:55 +0000
commit6157c14d0602c698aa9ebfac9a2135ef095a76b4 (patch)
treecae7cb80281a1426c06734889fd8f0b83292c6ca /Src
parentf95a6a913c885932827e9c0219221f7de7ccdd79 (diff)
downloadzsh-6157c14d0602c698aa9ebfac9a2135ef095a76b4.tar.gz
zsh-6157c14d0602c698aa9ebfac9a2135ef095a76b4.tar.xz
zsh-6157c14d0602c698aa9ebfac9a2135ef095a76b4.zip
22525: lengths and cases of multibyte strings in parameters and history
Diffstat (limited to 'Src')
-rw-r--r--Src/hist.c174
-rw-r--r--Src/jobs.c2
-rw-r--r--Src/pattern.c19
-rw-r--r--Src/subst.c67
-rw-r--r--Src/utils.c20
-rw-r--r--Src/zsh.h13
6 files changed, 182 insertions, 113 deletions
diff --git a/Src/hist.c b/Src/hist.c
index 0873ccce5..33c4035bf 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -635,10 +635,10 @@ histsubchar(int c)
 		quotebreak(&sline);
 		break;
 	    case 'l':
-		downcase(&sline);
+		sline = casemodify(sline, CASMOD_LOWER);
 		break;
 	    case 'u':
-		upcase(&sline);
+		sline = casemodify(sline, CASMOD_UPPER);
 		break;
 	    default:
 		herrflush();
@@ -1503,42 +1503,130 @@ remlpaths(char **junkptr)
     return 0;
 }
 
-/**/
-int
-makeuppercase(char **junkptr)
-{
-    char *str = *junkptr;
-
-    for (; *str; str++)
-	*str = tuupper(*str);
-    return 1;
-}
+/*
+ * Return modified version of str from the heap with modification
+ * according to one of the CASMOD_* types defined in zsh.h; CASMOD_NONE
+ * is not handled, for obvious reasons.
+ */
 
 /**/
-int
-makelowercase(char **junkptr)
-{
-    char *str = *junkptr;
-
-    for (; *str; str++)
-	*str = tulower(*str);
-    return 1;
-}
+char *
+casemodify(char *str, int how)
+{
+    char *str2 = zhalloc(2 * strlen(str) + 1);
+    char *ptr2 = str2;
+    int nextupper = 1;
+
+#ifdef MULTIBYTE_SUPPORT
+    if (isset(MULTIBYTE)) {
+	VARARR(char, mbstr, MB_CUR_MAX);
+	mbstate_t ps;
+
+	mb_metacharinit();
+	memset(&ps, 0, sizeof(ps));
+	while (*str) {
+	    wint_t wc;
+	    int len = mb_metacharlenconv(str, &wc), mod = 0, len2;
+	    /*
+	     * wc is set to WEOF if the start of str couldn't be
+	     * converted.  Presumably WEOF doesn't match iswlower(), but
+	     * better be safe.
+	     */
+	    if (wc == WEOF) {
+		while (len--)
+		    *ptr2++ = *str++;
+		/* not alphanumeric */
+		nextupper = 1;
+		continue;
+	    }
+	    switch (how) {
+	    case CASMOD_LOWER:
+		if (iswupper(wc)) {
+		    wc = towlower(wc);
+		    mod = 1;
+		}
+		break;
 
-/**/
-int
-makecapitals(char **junkptr)
-{
-    char *str = *junkptr;
+	    case CASMOD_UPPER:
+		if (iswlower(wc)) {
+		    wc = towupper(wc);
+		    mod = 1;
+		}
+		break;
 
-    for (; *str;) {
-	for (; *str && !ialnum(*str); str++);
-	if (*str)
-	    *str = tuupper(*str), str++;
-	for (; *str && ialnum(*str); str++)
-	    *str = tulower(*str);
+	    case CASMOD_CAPS:
+	    default:		/* shuts up compiler */
+		if (!iswalnum(wc))
+		    nextupper = 1;
+		else if (nextupper) {
+		    if (iswlower(wc)) {
+			wc = towupper(wc);
+			mod = 1;
+		    }
+		    nextupper = 0;
+		} else if (iswupper(wc)) {
+		    wc = towlower(wc);
+		    mod = 1;
+		}
+		break;
+	    }
+	    if (mod && (len2 = wcrtomb(mbstr, wc, &ps)) > 0) {
+		char *mbptr;
+
+		for (mbptr = mbstr; mbptr < mbstr + len2; mbptr++) {
+		    if (imeta(STOUC(*mbptr))) {
+			*ptr2++ = Meta;
+			*ptr2++ = *mbptr ^ 32;
+		    } else
+			*ptr2++ = *mbptr;
+		}
+		str += len;
+	    } else {
+		while (len--)
+		    *ptr2++ = *str++;
+	    }
+	}
     }
-    return 1;
+    else
+#endif
+	while (*str) {
+	    int c;
+	    if (*str == Meta) {
+		c = str[1] ^ 32;
+		str += 2;
+	    } else
+		c = *str++;
+	    switch (how) {
+	    case CASMOD_LOWER:
+		if (isupper(c))
+		    c = tolower(c);
+		break;
+
+	    case CASMOD_UPPER:
+		if (islower(c))
+		    c = toupper(c);
+		break;
+
+	    case CASMOD_CAPS:
+	    default:		/* shuts up compiler */
+		if (!ialnum(c))
+		    nextupper = 1;
+		else if (nextupper) {
+		    if (islower(c))
+			c = toupper(c);
+		    nextupper = 0;
+		} else if (isupper(c))
+		    c = tolower(c);
+		break;
+	    }
+	    if (imeta(c)) {
+		*ptr2++ = Meta;
+		*ptr2++ = c ^ 32;
+	    } else
+		*ptr2++ = c;
+	}
+    *ptr2 = '\0';
+    return str2;
 }
 
 /**/
@@ -1645,26 +1733,6 @@ getargs(Histent elist, int arg1, int arg2)
 }
 
 /**/
-void
-upcase(char **x)
-{
-    char *pp = *(char **)x;
-
-    for (; *pp; pp++)
-	*pp = tuupper(*pp);
-}
-
-/**/
-void
-downcase(char **x)
-{
-    char *pp = *(char **)x;
-
-    for (; *pp; pp++)
-	*pp = tulower(*pp);
-}
-
-/**/
 int
 quote(char **tr)
 {
diff --git a/Src/jobs.c b/Src/jobs.c
index cfc733ecf..509b9e843 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -2014,7 +2014,7 @@ bin_kill(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func))
 		    return 1;
 		} else
 		    signame = *argv;
-		makeuppercase(&signame);
+		signame = casemodify(signame, CASMOD_UPPER);
 		if (!strncmp(signame, "SIG", 3))
 		    signame+=3;
 
diff --git a/Src/pattern.c b/Src/pattern.c
index a39095c37..bc9afbae3 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1644,17 +1644,12 @@ charrefinc(char **x, char *y)
 }
 
 
-#ifndef PARAMETER_CODE_HANDLES_MULTIBYTE
 /*
- * TODO: We should use the other branch, but currently
- * the parameter code doesn't handle multibyte input,
- * so this would produce the wrong subscripts,
- * so just use a raw byte difference for now.
+ * Counter the number of characters between two pointers, smaller first
+ *
+ * This is used when setting values in parameters, so we obey
+ * the MULTIBYTE option (even if it's been overridden locally).
  */
-/* Counter the number of characters between two pointers, smaller first */
-# define CHARSUB(x,y)	((y) - (x))
-#else
-/* Counter the number of characters between two pointers, smaller first */
 #define CHARSUB(x,y)	charsub(x, y)
 static ptrdiff_t
 charsub(char *x, char *y)
@@ -1663,6 +1658,9 @@ charsub(char *x, char *y)
     size_t ret;
     wchar_t wc;
 
+    if (!isset(MULTIBYTE))
+	return y - x;
+
     while (x < y) {
 	ret = mbrtowc(&wc, x, y-x, &shiftstate);
 
@@ -1674,13 +1672,12 @@ charsub(char *x, char *y)
 	/* Treat nulls as normal characters */
 	if (!ret)
 	    ret = 1;
-	res += ret;
+	res++;
 	x += ret;
     }
 
     return res;
 }
-#endif
 
 #else /* no MULTIBYTE_SUPPORT */
 
diff --git a/Src/subst.c b/Src/subst.c
index 803f8d99d..d69f34c4b 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1019,7 +1019,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     /* (u): straightforward. */
     int unique = 0;
     /* combination of (L), (U) and (C) flags. */
-    int casmod = 0;
+    int casmod = CASMOD_NONE;
     /*
      * quotemod says we are doing either (q) (positive), (Q) (negative)
      * or not (0).  quotetype counts the q's for the first case.
@@ -1211,13 +1211,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    break;
 
 		case 'L':
-		    casmod = 2;
+		    casmod = CASMOD_LOWER;
 		    break;
 		case 'U':
-		    casmod = 1;
+		    casmod = CASMOD_UPPER;
 		    break;
 		case 'C':
-		    casmod = 3;
+		    casmod = CASMOD_CAPS;
 		    break;
 
 		case 'o':
@@ -1819,17 +1819,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    break;
 		}
 		switch (v->pm->node.flags & (PM_LOWER | PM_UPPER)) {
-		    char *t;
-
 		case PM_LOWER:
-		    t = val;
-		    for (; (c = *t); t++)
-			*t = tulower(c);
+		    val = casemodify(val, CASMOD_LOWER);
+		    copied = 1;
 		    break;
 		case PM_UPPER:
-		    t = val;
-		    for (; (c = *t); t++)
-			*t = tuupper(c);
+		    val = casemodify(val, CASMOD_UPPER);
+		    copied = 1;
 		    break;
 		}
 	    }
@@ -2316,14 +2312,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 
 	if (isarr) {
 	    char **ctr;
-	    int sl = sep ? ztrlen(sep) : 1;
+	    int sl = sep ? MB_METASTRLEN(sep) : 1;
 
 	    if (getlen == 1)
 		for (ctr = aval; *ctr; ctr++, len++);
 	    else if (getlen == 2) {
 		if (*aval)
 		    for (len = -sl, ctr = aval;
-			 len += sl + ztrlen(*ctr), *++ctr;);
+			 len += sl + MB_METASTRLEN(*ctr), *++ctr;);
 	    }
 	    else
 		for (ctr = aval;
@@ -2331,7 +2327,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		     len += wordcount(*ctr, spsep, getlen > 3), ctr++);
 	} else {
 	    if (getlen < 3)
-		len = ztrlen(val);
+		len = MB_METASTRLEN(val);
 	    else
 		len = wordcount(val, spsep, getlen > 3);
 	}
@@ -2387,33 +2383,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     /*
      * Perform case modififications.
      */
-    if (casmod) {
+    if (casmod != CASMOD_NONE) {
+	copied = 1;		/* string is always modified by copy */
 	if (isarr) {
-	    char **ap;
+	    char **ap, **ap2;
 
-	    if (!copied)
-		aval = arrdup(aval), copied = 1;
 	    ap = aval;
+	    ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
 
-	    if (casmod == 1)
-		for (; *ap; ap++)
-		    makeuppercase(ap);
-	    else if (casmod == 2)
-		for (; *ap; ap++)
-		    makelowercase(ap);
-	    else
-		for (; *ap; ap++)
-		    makecapitals(ap);
-
+	    while (*ap)
+		*ap2++ = casemodify(*ap++, casmod);
+	    *ap2++ = NULL;
 	} else {
-	    if (!copied)
-		val = dupstring(val), copied = 1;
-	    if (casmod == 1)
-		makeuppercase(&val);
-	    else if (casmod == 2)
-		makelowercase(&val);
-	    else
-		makecapitals(&val);
+	    val = casemodify(val, casmod);
 	}
     }
     /*
@@ -2975,7 +2957,8 @@ modify(char **str, char **ptr)
 		for (t = e = *str; (tt = findword(&e, sep));) {
 		    tc = *e;
 		    *e = '\0';
-		    copy = dupstring(tt);
+		    if (c != 'l' && c != 'u')
+			copy = dupstring(tt);
 		    *e = tc;
 		    switch (c) {
 		    case 'h':
@@ -2991,10 +2974,10 @@ modify(char **str, char **ptr)
 			remlpaths(&copy);
 			break;
 		    case 'l':
-			downcase(&copy);
+			copy = casemodify(tt, CASMOD_LOWER);
 			break;
 		    case 'u':
-			upcase(&copy);
+			copy = casemodify(tt, CASMOD_UPPER);
 			break;
 		    case 's':
 			if (hsubl && hsubr)
@@ -3050,10 +3033,10 @@ modify(char **str, char **ptr)
 		    remlpaths(str);
 		    break;
 		case 'l':
-		    downcase(str);
+		    *str = casemodify(*str, CASMOD_LOWER);
 		    break;
 		case 'u':
-		    upcase(str);
+		    *str = casemodify(*str, CASMOD_UPPER);
 		    break;
 		case 's':
 		    if (hsubl && hsubr) {
diff --git a/Src/utils.c b/Src/utils.c
index 4b2f07f19..32f6ae336 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -3687,7 +3687,7 @@ static mbstate_t mb_shiftstate;
 
 /*
  * Initialise multibyte state: called before a sequence of
- * mb_metacharlen().
+ * mb_metacharlenconv().
  */
 
 /**/
@@ -3703,18 +3703,24 @@ mb_metacharinit(void)
  * but character is not valid (e.g. possibly incomplete at end of string).
  * Returned value is guaranteed not to reach beyond the end of the
  * string (assuming correct metafication).
+ *
+ * If wcp is not NULL, the converted wide character is stored there.
+ * If no conversion could be done WEOF is used.
  */
 
 /**/
 int
-mb_metacharlen(char *s)
+mb_metacharlenconv(char *s, wint_t *wcp)
 {
     char inchar, *ptr;
     size_t ret;
     wchar_t wc;
 
-    if (!isset(MULTIBYTE))
+    if (!isset(MULTIBYTE)) {
+	if (wcp)
+	    *wcp = WEOF;
 	return 1 + (*s == Meta);
+    }
 
     ret = MB_INVALID;
     for (ptr = s; *ptr; ) {
@@ -3729,14 +3735,18 @@ mb_metacharlen(char *s)
 	    break;
 	if (ret == MB_INCOMPLETE)
 	    continue;
+	if (wcp)
+	    *wcp = wc;
 	return ptr - s;
     }
 
+    if (wcp)
+	*wcp = WEOF;
     /* No valid multibyte sequence */
     memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
-    if (ptr > s)
+    if (ptr > s) {
 	return 1 + (*s == Meta);	/* Treat as single byte character */
-    else
+    } else
 	return 0;		/* Probably shouldn't happen */
 }
 
diff --git a/Src/zsh.h b/Src/zsh.h
index 31609d3c5..b0962574a 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1882,6 +1882,17 @@ struct heap {
 #define ZSIG_ALIAS	(1<<3)  /* Trap is stored under an alias */
 #define ZSIG_SHIFT	4
 
+/************************/
+/* Flags to casemodifiy */
+/************************/
+
+enum {
+    CASMOD_NONE,		/* dummy for tests */
+    CASMOD_UPPER,
+    CASMOD_LOWER,
+    CASMOD_CAPS
+};
+
 /**********************************/
 /* Flags to third argument of zle */
 /**********************************/
@@ -1927,7 +1938,7 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
 #ifdef MULTIBYTE_SUPPORT
 #define nicezputs(str, outs)	(void)mb_niceformat((str), (outs), NULL, 0)
 #define MB_METACHARINIT()	mb_metacharinit()
-#define MB_METACHARLEN(str)	mb_metacharlen(str)
+#define MB_METACHARLEN(str)	mb_metacharlenconv(str, NULL)
 #define MB_METASTRLEN(str)	mb_metastrlen(str)
 
 #define MB_INCOMPLETE	((size_t)-2)