From d8207acddbd1ad5e9339115f7b7bf09820b98c5a Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Thu, 2 Nov 2006 18:43:19 +0000 Subject: 22952: fix some argument delimiters to work with multibyte characters --- ChangeLog | 6 ++ Src/glob.c | 17 +++-- Src/params.c | 23 +++--- Src/subst.c | 197 +++++++++++++++++++++++++++++++++++-------------- Test/D04parameter.ztst | 14 ++++ Test/D07multibyte.ztst | 14 ++++ 6 files changed, 196 insertions(+), 75 deletions(-) diff --git a/ChangeLog b/ChangeLog index f703cf0fd..c6ebc0d02 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2006-11-02 Peter Stephenson + * 22952: Src/glob.c, Src/params.c, Src/subst.c, + Test/D04parameter.ztst: fix multibyte delimiters for + arguments to parameter flags and substitution modifiers + in parameters and glob qualifiers (but not yet substitution + modifiers in history). + * 22950: Src/Zle/zle_tricky.c: starting menu completion with reverse-menu-complete used the first match instead of the last. diff --git a/Src/glob.c b/Src/glob.c index 201427bdb..394e91d01 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -1243,9 +1243,10 @@ zglob(LinkList list, LinkNode np, int nountok) else { /* ... or a user name */ char sav, *tt; + int arglen; /* Find matching delimiters */ - tt = get_strarg(s); + tt = get_strarg(s, &arglen); if (!*tt) { zerr("missing end of name"); data = 0; @@ -1255,7 +1256,7 @@ zglob(LinkList list, LinkNode np, int nountok) sav = *tt; *tt = '\0'; - if ((pw = getpwnam(s + 1))) + if ((pw = getpwnam(s + arglen))) data = pw->pw_uid; else { zerr("unknown user"); @@ -1268,7 +1269,7 @@ zglob(LinkList list, LinkNode np, int nountok) data = 0; #endif /* !USE_GETPWNAM */ if (sav) - s = tt + 1; + s = tt + arglen; else s = tt; } @@ -1283,8 +1284,9 @@ zglob(LinkList list, LinkNode np, int nountok) else { /* ...or a delimited group name. */ char sav, *tt; + int arglen; - tt = get_strarg(s); + tt = get_strarg(s, &arglen); if (!*tt) { zerr("missing end of name"); data = 0; @@ -1294,7 +1296,7 @@ zglob(LinkList list, LinkNode np, int nountok) sav = *tt; *tt = '\0'; - if ((gr = getgrnam(s + 1))) + if ((gr = getgrnam(s + arglen))) data = gr->gr_gid; else { zerr("unknown group"); @@ -1307,7 +1309,7 @@ zglob(LinkList list, LinkNode np, int nountok) data = 0; #endif /* !USE_GETGRNAM */ if (sav) - s = tt + 1; + s = tt + arglen; else s = tt; } @@ -1438,8 +1440,7 @@ zglob(LinkList list, LinkNode np, int nountok) tt = NULL; } } else { - plus = 1; - tt = get_strarg(s); + tt = get_strarg(s, &plus); if (!*tt) { zerr("missing end of string"); diff --git a/Src/params.c b/Src/params.c index e60c8c740..7d7f0e8e7 100644 --- a/Src/params.c +++ b/Src/params.c @@ -947,7 +947,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w, int *prevcharlen, int *nextcharlen) { int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash; - int keymatch = 0, needtok = 0; + int keymatch = 0, needtok = 0, arglen; char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c; zlong num = 1, beg = 0, r = 0; Patprog pprog = NULL; @@ -1004,28 +1004,28 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w, * special interpretation by getindex() of `*' or `@'. */ break; case 'n': - t = get_strarg(++s); + t = get_strarg(++s, &arglen); if (!*t) goto flagerr; sav = *t; *t = '\0'; - num = mathevalarg(s + 1, &d); + num = mathevalarg(s + arglen, &d); if (!num) num = 1; *t = sav; - s = t; + s = t + arglen - 1; break; case 'b': hasbeg = 1; - t = get_strarg(++s); + t = get_strarg(++s, &arglen); if (!*t) goto flagerr; sav = *t; *t = '\0'; - if ((beg = mathevalarg(s + 1, &d)) > 0) + if ((beg = mathevalarg(s + arglen, &d)) > 0) beg--; *t = sav; - s = t; + s = t + arglen - 1; break; case 'p': escapes = 1; @@ -1033,15 +1033,16 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w, case 's': /* This gives the string that separates words * * (for use with the `w' flag). */ - t = get_strarg(++s); + t = get_strarg(++s, &arglen); if (!*t) goto flagerr; sav = *t; *t = '\0'; - sep = escapes ? getkeystring(s + 1, &waste, GETKEYS_SEP, NULL) - : dupstring(s + 1); + s += arglen; + sep = escapes ? getkeystring(s, &waste, GETKEYS_SEP, NULL) + : dupstring(s); *t = sav; - s = t; + s = t + arglen - 1; break; default: flagerr: diff --git a/Src/subst.c b/Src/subst.c index abc3c82af..3a5b9b353 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -1137,62 +1137,113 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone, return ret; } + +/* + * Look for a delimited portion of a string. The first (possibly + * multibyte) character at s is the delimiter. Various forms + * of brackets are treated separately, as documented. + * + * Returns a pointer to the final delimiter. Sets *len to the + * length of the final delimiter; a NULL causes *len to be set + * to zero since we shouldn't advance past it. (The string is + * tokenized, so a NULL is a real end of string.) + */ + /**/ char * -get_strarg(char *s) +get_strarg(char *s, int *lenp) { - char t = *s++; + convchar_t del; + int len; + char tok = 0; - if (!t) - return s - 1; + MB_METACHARINIT(); + len = MB_METACHARLENCONV(s, &del); + if (!len) { + *lenp = 0; + return s; + } - switch (t) { - case '(': - t = ')'; +#ifdef MULTIBYTE_SUPPORT + if (del == WEOF) + del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s); +#endif + s += len; + switch (del) { + case ZWC('('): + del = ZWC(')'); break; case '[': - t = ']'; + del = ZWC(']'); break; case '{': - t = '}'; + del = ZWC('}'); break; case '<': - t = '>'; + del = ZWC('>'); break; case Inpar: - t = Outpar; + tok = Outpar; break; case Inang: - t = Outang; + tok = Outang; break; case Inbrace: - t = Outbrace; + tok = Outbrace; break; case Inbrack: - t = Outbrack; + tok = Outbrack; break; } - while (*s && *s != t) - s++; + if (tok) { + /* + * Looking for a matching token; we want the literal byte, + * not a decoded multibyte character, so search specially. + */ + while (*s && *s != tok) + s++; + } else { + convchar_t del2; + len = 0; + while (*s) { + len = MB_METACHARLENCONV(s, &del2); +#ifdef MULTIBYTE_SUPPORT + if (del2 == WEOF) + del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s); +#endif + if (del == del2) + break; + s += len; + } + } + *lenp = len; return s; } +/* + * Get an integer argument; update *s to the end of the + * final delimiter. *delmatchp is set to 1 if we have matching + * delimiters and there was no error in the evaluation, else 0. + */ + /**/ static int -get_intarg(char **s) +get_intarg(char **s, int *delmatchp) { - char *t = get_strarg(*s + 1); + int arglen; + char *t = get_strarg(*s, &arglen); char *p, sav; zlong ret; + *delmatchp = 0; if (!*t) return -1; sav = *t; *t = '\0'; - p = dupstring(*s + 2); - *s = t; + p = dupstring(*s + arglen); + *s = t + arglen; *t = sav; if (parsestr(p)) return -1; @@ -1204,6 +1255,7 @@ get_intarg(char **s) return -1; if (ret < 0) ret = -ret; + *delmatchp = 1; return ret < 0 ? -ret : ret; } @@ -1540,8 +1592,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) int escapes = 0; int klen; #define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C)) -#define UNTOK_AND_ESCAPE(X) {\ - untokenize(X = dupstring(s + 1));\ +#define UNTOK_AND_ESCAPE(X, S) {\ + untokenize(X = dupstring(S));\ if (escapes) {\ X = getkeystring(X, &klen, GETKEYS_SEP, NULL);\ X = metafy(X, klen, META_HREALLOC);\ @@ -1549,6 +1601,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) { + int arglen; /* length of modifier argument */ + int delmatch; /* integer delimiters matched OK */ + switch (c) { case ')': case Outpar: @@ -1578,9 +1633,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) flags |= SUB_SUBSTR; break; case 'I': - flnum = get_intarg(&s); + s++; + flnum = get_intarg(&s, &delmatch); if (flnum < 0) goto flagerr; + s--; break; case 'L': @@ -1658,16 +1715,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) tt = 1; /* fall through */ case 'j': - t = get_strarg(++s); + t = get_strarg(++s, &arglen); if (*t) { sav = *t; *t = '\0'; if (tt) - UNTOK_AND_ESCAPE(spsep) + UNTOK_AND_ESCAPE(spsep, s + arglen) else - UNTOK_AND_ESCAPE(sep) + UNTOK_AND_ESCAPE(sep, s + arglen) *t = sav; - s = t; + s = t + arglen - 1; } else goto flagerr; break; @@ -1676,43 +1733,43 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) tt = 1; /* fall through */ case 'r': - sav = s[1]; - num = get_intarg(&s); + s++; + num = get_intarg(&s, &delmatch); if (num < 0) goto flagerr; if (tt) prenum = num; else postnum = num; - if (UNTOK(s[1]) != UNTOK(sav)) + if (!delmatch) break; - t = get_strarg(++s); + t = get_strarg(s, &arglen); if (!*t) goto flagerr; sav = *t; *t = '\0'; if (tt) - UNTOK_AND_ESCAPE(premul) + UNTOK_AND_ESCAPE(premul, s + arglen) else - UNTOK_AND_ESCAPE(postmul) + UNTOK_AND_ESCAPE(postmul, s + arglen) *t = sav; sav = *s; - s = t + 1; + s = t + arglen; if (UNTOK(*s) != UNTOK(sav)) { s--; break; } - t = get_strarg(s); + t = get_strarg(s, &arglen); if (!*t) goto flagerr; sav = *t; *t = '\0'; if (tt) - UNTOK_AND_ESCAPE(preone) + UNTOK_AND_ESCAPE(preone, s + arglen) else - UNTOK_AND_ESCAPE(postone) + UNTOK_AND_ESCAPE(postone, s + arglen) *t = sav; - s = t; + s = t + arglen - 1; break; case 'm': @@ -3251,9 +3308,10 @@ arithsubst(char *a, char **bptr, char *rest) void modify(char **str, char **ptr) { - char *ptr1, *ptr2, *ptr3, del, *lptr, c, *test, *sep, *t, *tt, tc, *e; - char *copy, *all, *tmp, sav; - int gbal, wall, rec, al, nl; + char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e; + char *copy, *all, *tmp, sav, sav1, *ptr1end; + int gbal, wall, rec, al, nl, charlen, delmatch; + convchar_t del; test = NULL; @@ -3282,20 +3340,48 @@ modify(char **str, char **ptr) break; case 's': - /* TODO: multibyte delimiter */ c = **ptr; (*ptr)++; ptr1 = *ptr; - del = *ptr1++; - for (ptr2 = ptr1; *ptr2 != del && *ptr2; ptr2++); + MB_METACHARINIT(); + charlen = MB_METACHARLENCONV(ptr1, &del); +#ifdef MULTIBYTE_SUPPORT + if (del == WEOF) + del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1); +#endif + ptr1 += charlen; + for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) { + convchar_t del2; + charlen = MB_METACHARLENCONV(ptr2, &del2); +#ifdef MULTIBYTE_SUPPORT + if (del2 == WEOF) + del2 = (wint_t)((*ptr2 == Meta) ? + ptr2[1] ^ 32 : *ptr2); +#endif + if (del2 == del) + break; + } if (!*ptr2) { zerr("bad substitution"); return; } - *ptr2++ = '\0'; - for (ptr3 = ptr2; *ptr3 != del && *ptr3; ptr3++); - if ((sav = *ptr3)) - *ptr3++ = '\0'; + ptr1end = ptr2; + ptr2 += charlen; + sav1 = *ptr1end; + *ptr1end = '\0'; + for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) { + convchar_t del3; + charlen = MB_METACHARLENCONV(ptr3, &del3); +#ifdef MULTIBYTE_SUPPORT + if (del3 == WEOF) + del3 = (wint_t)((*ptr3 == Meta) ? + ptr3[1] ^ 32 : *ptr3); +#endif + if (del3 == del) + break; + } + sav = *ptr3; + *ptr3 = '\0'; if (*ptr1) { zsfree(hsubl); hsubl = ztrdup(ptr1); @@ -3313,10 +3399,9 @@ modify(char **str, char **ptr) for (tt = hsubr = ztrdup(ptr2); *tt; tt++) if (inull(*tt) && *tt != Bnullkeep) chuck(tt--); - ptr2[-1] = del; - if (sav) - ptr3[-1] = sav; - *ptr = ptr3 - 1; + *ptr1end = sav1; + *ptr3 = sav; + *ptr = ptr3 + charlen - 1; break; case '&': @@ -3335,13 +3420,13 @@ modify(char **str, char **ptr) case 'W': wall = 1; (*ptr)++; - ptr1 = get_strarg(ptr2 = *ptr); + ptr1 = get_strarg(ptr2 = *ptr, &charlen); if ((sav = *ptr1)) *ptr1 = '\0'; - sep = dupstring(ptr2 + 1); + sep = dupstring(ptr2 + charlen); if (sav) *ptr1 = sav; - *ptr = ptr1 + 1; + *ptr = ptr1 + charlen; c = '\0'; break; @@ -3350,8 +3435,8 @@ modify(char **str, char **ptr) (*ptr)++; break; case 'F': - rec = get_intarg(ptr); (*ptr)++; + rec = get_intarg(ptr, &delmatch); break; default: *ptr = lptr; diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 57147d53e..ce5898f88 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -867,3 +867,17 @@ >andsomekept >andsomekept + file=/one/two/three/four + print ${file:fh} + print ${file:F.1.h} + print ${file:F+2+h} + print ${file:F(3)h} + print ${file:F<4>h} + print ${file:F{5}h} +0:Modifiers with repetition +>/ +>/one/two/three +>/one/two +>/one +>/ +>/ diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst index 8b17a7294..752013eec 100644 --- a/Test/D07multibyte.ztst +++ b/Test/D07multibyte.ztst @@ -297,3 +297,17 @@ >«κατέβην ¥«χθὲς»£ ¥¥«εἰς»£ «Πειραιᾶ >ςκατέβην ηςχθὲςΓλ τηςεἰςΓλ ςΠειραιᾶ # er... yeah, that looks right... + + foo=picobarn + print ${foo:s£bar£rod£:s¥rod¥stick¥} +0:Delimiters in modifiers +>picostickn + +# TODO: if we get paired multibyte bracket delimiters to work +# (as Emacs does, the smug so-and-so), the following should change. + foo=bar + print ${(r£5£¥X¥)foo} + print ${(l«10«»Y»£HI£)foo} +0:Delimiters in parameter flags +>barXX +>YYYYYHIbar -- cgit 1.4.1