summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2006-11-02 18:43:19 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2006-11-02 18:43:19 +0000
commitd8207acddbd1ad5e9339115f7b7bf09820b98c5a (patch)
tree710aa94ec2ee2d06bedd341d0e546d0301af4c03
parentd94e67d6fd182860dca7580edc151315c048f6d7 (diff)
downloadzsh-d8207acddbd1ad5e9339115f7b7bf09820b98c5a.tar.gz
zsh-d8207acddbd1ad5e9339115f7b7bf09820b98c5a.tar.xz
zsh-d8207acddbd1ad5e9339115f7b7bf09820b98c5a.zip
22952: fix some argument delimiters to work with multibyte characters
-rw-r--r--ChangeLog6
-rw-r--r--Src/glob.c17
-rw-r--r--Src/params.c23
-rw-r--r--Src/subst.c197
-rw-r--r--Test/D04parameter.ztst14
-rw-r--r--Test/D07multibyte.ztst14
6 files changed, 196 insertions, 75 deletions
diff --git a/ChangeLog b/ChangeLog
index f703cf0fd..c6ebc0d02 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2006-11-02  Peter Stephenson  <pws@csr.com>
 
+	* 22952: Src/glob.c, Src/params.c, Src/subst.c,
+	Test/D04parameter.ztst: fix multibyte delimiters for
+	arguments to parameter flags and substitution modifiers
+	in parameters and glob qualifiers (but not yet substitution
+	modifiers in history).
+
 	* 22950: Src/Zle/zle_tricky.c: starting menu completion
 	with reverse-menu-complete used the first match instead
 	of the last.
diff --git a/Src/glob.c b/Src/glob.c
index 201427bdb..394e91d01 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -1243,9 +1243,10 @@ zglob(LinkList list, LinkNode np, int nountok)
 		    else {
 			/* ... or a user name */
 			char sav, *tt;
+			int arglen;
 
 			/* Find matching delimiters */
-			tt = get_strarg(s);
+			tt = get_strarg(s, &arglen);
 			if (!*tt) {
 			    zerr("missing end of name");
 			    data = 0;
@@ -1255,7 +1256,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 			    sav = *tt;
 			    *tt = '\0';
 
-			    if ((pw = getpwnam(s + 1)))
+			    if ((pw = getpwnam(s + arglen)))
 				data = pw->pw_uid;
 			    else {
 				zerr("unknown user");
@@ -1268,7 +1269,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 			    data = 0;
 #endif /* !USE_GETPWNAM */
 			    if (sav)
-				s = tt + 1;
+				s = tt + arglen;
 			    else
 				s = tt;
 			}
@@ -1283,8 +1284,9 @@ zglob(LinkList list, LinkNode np, int nountok)
 		    else {
 			/* ...or a delimited group name. */
 			char sav, *tt;
+			int arglen;
 
-			tt = get_strarg(s);
+			tt = get_strarg(s, &arglen);
 			if (!*tt) {
 			    zerr("missing end of name");
 			    data = 0;
@@ -1294,7 +1296,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 			    sav = *tt;
 			    *tt = '\0';
 
-			    if ((gr = getgrnam(s + 1)))
+			    if ((gr = getgrnam(s + arglen)))
 				data = gr->gr_gid;
 			    else {
 				zerr("unknown group");
@@ -1307,7 +1309,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 			    data = 0;
 #endif /* !USE_GETGRNAM */
 			    if (sav)
-				s = tt + 1;
+				s = tt + arglen;
 			    else
 				s = tt;
 			}
@@ -1438,8 +1440,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 			    tt = NULL;
 			}
 		    } else {
-			plus = 1;
-			tt = get_strarg(s);
+			tt = get_strarg(s, &plus);
 			if (!*tt)
 			{
 			    zerr("missing end of string");
diff --git a/Src/params.c b/Src/params.c
index e60c8c740..7d7f0e8e7 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -947,7 +947,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
        int *prevcharlen, int *nextcharlen)
 {
     int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash;
-    int keymatch = 0, needtok = 0;
+    int keymatch = 0, needtok = 0, arglen;
     char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c;
     zlong num = 1, beg = 0, r = 0;
     Patprog pprog = NULL;
@@ -1004,28 +1004,28 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
 		 * special interpretation by getindex() of `*' or `@'. */
 		break;
 	    case 'n':
-		t = get_strarg(++s);
+		t = get_strarg(++s, &arglen);
 		if (!*t)
 		    goto flagerr;
 		sav = *t;
 		*t = '\0';
-		num = mathevalarg(s + 1, &d);
+		num = mathevalarg(s + arglen, &d);
 		if (!num)
 		    num = 1;
 		*t = sav;
-		s = t;
+		s = t + arglen - 1;
 		break;
 	    case 'b':
 		hasbeg = 1;
-		t = get_strarg(++s);
+		t = get_strarg(++s, &arglen);
 		if (!*t)
 		    goto flagerr;
 		sav = *t;
 		*t = '\0';
-		if ((beg = mathevalarg(s + 1, &d)) > 0)
+		if ((beg = mathevalarg(s + arglen, &d)) > 0)
 		    beg--;
 		*t = sav;
-		s = t;
+		s = t + arglen - 1;
 		break;
 	    case 'p':
 		escapes = 1;
@@ -1033,15 +1033,16 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
 	    case 's':
 		/* This gives the string that separates words *
 		 * (for use with the `w' flag).               */
-		t = get_strarg(++s);
+		t = get_strarg(++s, &arglen);
 		if (!*t)
 		    goto flagerr;
 		sav = *t;
 		*t = '\0';
-		sep = escapes ? getkeystring(s + 1, &waste, GETKEYS_SEP, NULL)
-		    : dupstring(s + 1);
+		s += arglen;
+		sep = escapes ? getkeystring(s, &waste, GETKEYS_SEP, NULL)
+		    : dupstring(s);
 		*t = sav;
-		s = t;
+		s = t + arglen - 1;
 		break;
 	    default:
 	      flagerr:
diff --git a/Src/subst.c b/Src/subst.c
index abc3c82af..3a5b9b353 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1137,62 +1137,113 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
     return ret;
 }
 
+
+/*
+ * Look for a delimited portion of a string.  The first (possibly
+ * multibyte) character at s is the delimiter.  Various forms
+ * of brackets are treated separately, as documented.
+ *
+ * Returns a pointer to the final delimiter.  Sets *len to the
+ * length of the final delimiter; a NULL causes *len to be set
+ * to zero since we shouldn't advance past it.  (The string is
+ * tokenized, so a NULL is a real end of string.)
+ */
+
 /**/
 char *
-get_strarg(char *s)
+get_strarg(char *s, int *lenp)
 {
-    char t = *s++;
+    convchar_t del;
+    int len;
+    char tok = 0;
 
-    if (!t)
-	return s - 1;
+    MB_METACHARINIT();
+    len = MB_METACHARLENCONV(s, &del);
+    if (!len) {
+	*lenp = 0;
+	return s;
+    }
 
-    switch (t) {
-    case '(':
-	t = ')';
+#ifdef MULTIBYTE_SUPPORT
+    if (del == WEOF)
+	del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
+#endif
+    s += len;
+    switch (del) {
+    case ZWC('('):
+	del = ZWC(')');
 	break;
     case '[':
-	t = ']';
+	del = ZWC(']');
 	break;
     case '{':
-	t = '}';
+	del = ZWC('}');
 	break;
     case '<':
-	t = '>';
+	del = ZWC('>');
 	break;
     case Inpar:
-	t = Outpar;
+	tok = Outpar;
 	break;
     case Inang:
-	t = Outang;
+	tok = Outang;
 	break;
     case Inbrace:
-	t = Outbrace;
+	tok = Outbrace;
 	break;
     case Inbrack:
-	t = Outbrack;
+	tok = Outbrack;
 	break;
     }
 
-    while (*s && *s != t)
-	s++;
+    if (tok) {
+	/*
+	 * Looking for a matching token; we want the literal byte,
+	 * not a decoded multibyte character, so search specially.
+	 */
+	while (*s && *s != tok)
+	    s++;
+    } else {
+	convchar_t del2;
+	len = 0;
+	while (*s) {
+	    len = MB_METACHARLENCONV(s, &del2);
+#ifdef MULTIBYTE_SUPPORT
+	    if (del2 == WEOF)
+		del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
+#endif
+	    if (del == del2)
+		break;
+	    s += len;
+	}
+    }
 
+    *lenp = len;
     return s;
 }
 
+/*
+ * Get an integer argument; update *s to the end of the
+ * final delimiter.  *delmatchp is set to 1 if we have matching
+ * delimiters and there was no error in the evaluation, else 0.
+ */
+
 /**/
 static int
-get_intarg(char **s)
+get_intarg(char **s, int *delmatchp)
 {
-    char *t = get_strarg(*s + 1);
+    int arglen;
+    char *t = get_strarg(*s, &arglen);
     char *p, sav;
     zlong ret;
 
+    *delmatchp = 0;
     if (!*t)
 	return -1;
     sav = *t;
     *t = '\0';
-    p = dupstring(*s + 2);
-    *s = t;
+    p = dupstring(*s + arglen);
+    *s = t + arglen;
     *t = sav;
     if (parsestr(p))
 	return -1;
@@ -1204,6 +1255,7 @@ get_intarg(char **s)
 	return -1;
     if (ret < 0)
 	ret = -ret;
+    *delmatchp = 1;
     return ret < 0 ? -ret : ret;
 }
 
@@ -1540,8 +1592,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    int escapes = 0;
 	    int klen;
 #define UNTOK(C)  (itok(C) ? ztokens[(C) - Pound] : (C))
-#define UNTOK_AND_ESCAPE(X) {\
-		untokenize(X = dupstring(s + 1));\
+#define UNTOK_AND_ESCAPE(X, S) {\
+		untokenize(X = dupstring(S));\
 		if (escapes) {\
 		    X = getkeystring(X, &klen, GETKEYS_SEP, NULL);\
 		    X = metafy(X, klen, META_HREALLOC);\
@@ -1549,6 +1601,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    }
 
 	    for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
+		int arglen;	/* length of modifier argument */
+		int delmatch;	/* integer delimiters matched OK */
+
 		switch (c) {
 		case ')':
 		case Outpar:
@@ -1578,9 +1633,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    flags |= SUB_SUBSTR;
 		    break;
 		case 'I':
-		    flnum = get_intarg(&s);
+		    s++;
+		    flnum = get_intarg(&s, &delmatch);
 		    if (flnum < 0)
 			goto flagerr;
+		    s--;
 		    break;
 
 		case 'L':
@@ -1658,16 +1715,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    tt = 1;
 		/* fall through */
 		case 'j':
-		    t = get_strarg(++s);
+		    t = get_strarg(++s, &arglen);
 		    if (*t) {
 			sav = *t;
 			*t = '\0';
 			if (tt)
-			    UNTOK_AND_ESCAPE(spsep)
+			    UNTOK_AND_ESCAPE(spsep, s + arglen)
 			else
-			    UNTOK_AND_ESCAPE(sep)
+			    UNTOK_AND_ESCAPE(sep, s + arglen)
 			*t = sav;
-			s = t;
+			s = t + arglen - 1;
 		    } else
 			goto flagerr;
 		    break;
@@ -1676,43 +1733,43 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    tt = 1;
 		/* fall through */
 		case 'r':
-		    sav = s[1];
-		    num = get_intarg(&s);
+		    s++;
+		    num = get_intarg(&s, &delmatch);
 		    if (num < 0)
 			goto flagerr;
 		    if (tt)
 			prenum = num;
 		    else
 			postnum = num;
-		    if (UNTOK(s[1]) != UNTOK(sav))
+		    if (!delmatch)
 			break;
-		    t = get_strarg(++s);
+		    t = get_strarg(s, &arglen);
 		    if (!*t)
 			goto flagerr;
 		    sav = *t;
 		    *t = '\0';
 		    if (tt)
-			UNTOK_AND_ESCAPE(premul)
+			UNTOK_AND_ESCAPE(premul, s + arglen)
 		    else
-			UNTOK_AND_ESCAPE(postmul)
+			UNTOK_AND_ESCAPE(postmul, s + arglen)
 		    *t = sav;
 		    sav = *s;
-		    s = t + 1;
+		    s = t + arglen;
 		    if (UNTOK(*s) != UNTOK(sav)) {
 			s--;
 			break;
 		    }
-		    t = get_strarg(s);
+		    t = get_strarg(s, &arglen);
 		    if (!*t)
 			goto flagerr;
 		    sav = *t;
 		    *t = '\0';
 		    if (tt)
-			UNTOK_AND_ESCAPE(preone)
+			UNTOK_AND_ESCAPE(preone, s + arglen)
 		    else
-			UNTOK_AND_ESCAPE(postone)
+			UNTOK_AND_ESCAPE(postone, s + arglen)
 		    *t = sav;
-		    s = t;
+		    s = t + arglen - 1;
 		    break;
 
 		case 'm':
@@ -3251,9 +3308,10 @@ arithsubst(char *a, char **bptr, char *rest)
 void
 modify(char **str, char **ptr)
 {
-    char *ptr1, *ptr2, *ptr3, del, *lptr, c, *test, *sep, *t, *tt, tc, *e;
-    char *copy, *all, *tmp, sav;
-    int gbal, wall, rec, al, nl;
+    char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
+    char *copy, *all, *tmp, sav, sav1, *ptr1end;
+    int gbal, wall, rec, al, nl, charlen, delmatch;
+    convchar_t del;
 
     test = NULL;
 
@@ -3282,20 +3340,48 @@ modify(char **str, char **ptr)
 		break;
 
 	    case 's':
-		/* TODO: multibyte delimiter */
 		c = **ptr;
 		(*ptr)++;
 		ptr1 = *ptr;
-		del = *ptr1++;
-		for (ptr2 = ptr1; *ptr2 != del && *ptr2; ptr2++);
+		MB_METACHARINIT();
+		charlen = MB_METACHARLENCONV(ptr1, &del);
+#ifdef MULTIBYTE_SUPPORT
+		if (del == WEOF)
+		    del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
+#endif
+		ptr1 += charlen;
+		for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
+		    convchar_t del2;
+		    charlen = MB_METACHARLENCONV(ptr2, &del2);
+#ifdef MULTIBYTE_SUPPORT
+		    if (del2 == WEOF)
+			del2 = (wint_t)((*ptr2 == Meta) ?
+					ptr2[1] ^ 32 : *ptr2);
+#endif
+		    if (del2 == del)
+			break;
+		}
 		if (!*ptr2) {
 		    zerr("bad substitution");
 		    return;
 		}
-		*ptr2++ = '\0';
-		for (ptr3 = ptr2; *ptr3 != del && *ptr3; ptr3++);
-		if ((sav = *ptr3))
-		    *ptr3++ = '\0';
+		ptr1end = ptr2;
+		ptr2 += charlen;
+		sav1 = *ptr1end;
+		*ptr1end = '\0';
+		for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
+		    convchar_t del3;
+		    charlen = MB_METACHARLENCONV(ptr3, &del3);
+#ifdef MULTIBYTE_SUPPORT
+		    if (del3 == WEOF)
+			del3 = (wint_t)((*ptr3 == Meta) ?
+					ptr3[1] ^ 32 : *ptr3);
+#endif
+		    if (del3 == del)
+			break;
+		}
+		sav = *ptr3;
+		*ptr3 = '\0';
 		if (*ptr1) {
 		    zsfree(hsubl);
 		    hsubl = ztrdup(ptr1);
@@ -3313,10 +3399,9 @@ modify(char **str, char **ptr)
 		for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
 		    if (inull(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
-		ptr2[-1] = del;
-		if (sav)
-		    ptr3[-1] = sav;
-		*ptr = ptr3 - 1;
+		*ptr1end = sav1;
+		*ptr3 = sav;
+		*ptr = ptr3 + charlen - 1;
 		break;
 
 	    case '&':
@@ -3335,13 +3420,13 @@ modify(char **str, char **ptr)
 	    case 'W':
 		wall = 1;
 		(*ptr)++;
-		ptr1 = get_strarg(ptr2 = *ptr);
+		ptr1 = get_strarg(ptr2 = *ptr, &charlen);
 		if ((sav = *ptr1))
 		    *ptr1 = '\0';
-		sep = dupstring(ptr2 + 1);
+		sep = dupstring(ptr2 + charlen);
 		if (sav)
 		    *ptr1 = sav;
-		*ptr = ptr1 + 1;
+		*ptr = ptr1 + charlen;
 		c = '\0';
 		break;
 
@@ -3350,8 +3435,8 @@ modify(char **str, char **ptr)
 		(*ptr)++;
 		break;
 	    case 'F':
-		rec = get_intarg(ptr);
 		(*ptr)++;
+		rec = get_intarg(ptr, &delmatch);
 		break;
 	    default:
 		*ptr = lptr;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 57147d53e..ce5898f88 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -867,3 +867,17 @@
 >andsomekept
 >andsomekept
 
+  file=/one/two/three/four
+  print ${file:fh}
+  print ${file:F.1.h}
+  print ${file:F+2+h}
+  print ${file:F(3)h}
+  print ${file:F<4>h}
+  print ${file:F{5}h}
+0:Modifiers with repetition
+>/
+>/one/two/three
+>/one/two
+>/one
+>/
+>/
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 8b17a7294..752013eec 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -297,3 +297,17 @@
 >«κατέβην ¥«χθὲς»£ ¥¥«εἰς»£ «Πειραιᾶ
 >ςκατέβην ηςχθὲςΓλ τηςεἰςΓλ ςΠειραιᾶ
 # er... yeah, that looks right...
+
+  foo=picobarn
+  print ${foo:s£bar£rod£:s¥rod¥stick¥}
+0:Delimiters in modifiers
+>picostickn
+
+# TODO: if we get paired multibyte bracket delimiters to work
+# (as Emacs does, the smug so-and-so), the following should change.
+  foo=bar
+  print ${(r£5£¥X¥)foo}
+  print ${(l«10«»Y»£HI£)foo}
+0:Delimiters in parameter flags
+>barXX
+>YYYYYHIbar