From 56e1904e45cc474a9858dc1205055008f8c63752 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 25 Mar 2008 17:47:10 +0000 Subject: fix optimisation of string matching with multibyte mode 24732: attempt to provide adequate space for sched ztrftime string with multibyte characters --- ChangeLog | 10 ++++++++++ Doc/Zsh/params.yo | 30 ++++++++++++++++-------------- Src/Builtins/sched.c | 4 ++-- Src/params.c | 10 ++++++---- Src/pattern.c | 2 +- Test/D04parameter.ztst | 37 +++++++++++++++++++++++++++++++++++++ 6 files changed, 72 insertions(+), 21 deletions(-) diff --git a/ChangeLog b/ChangeLog index 211a5b42b..6f2e0d04e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2008-03-25 Peter Stephenson + + * 24732: Src/Builtins/sched.c: not enough space for ztrftime + string with multibyte characters. + + * 24731: Doc/Zsh/params.yo, Src/params.c, Src/pattern.c, + Test/D04parameter.ztst: use [(e)...] parameter flag to quote + pattern characters, also fix bug that optimization of matches + with no pattern characters didnʼt work for multibyte matching. + 2008-03-23 Clint Adams * 24730: Completion/Unix/Command/_git: use --no-color when diff --git a/Doc/Zsh/params.yo b/Doc/Zsh/params.yo index 797fc3e0e..48e1838bf 100644 --- a/Doc/Zsh/params.yo +++ b/Doc/Zsh/params.yo @@ -234,16 +234,14 @@ subscript 0; this is empty unless one of the options tt(KSH_ARRAYS) or tt(KSH_ZERO_SUBSCRIPT) is in effect. Note that in subscripts with both `tt(r)' and `tt(R)' pattern characters -are active even if they were substituted for a parameter (regardless -of the setting of tt(GLOB_SUBST) which controls this feature in normal -pattern matching). It is therefore necessary to quote pattern characters -for an exact string match. Given a string in tt($key), and assuming -the tt(EXTENDED_GLOB) option is set, the following is sufficient to -match an element of an array tt($array) containing exactly the value of -tt($key): +are active even if they were substituted for a parameter (regardless of the +setting of tt(GLOB_SUBST) which controls this feature in normal pattern +matching). The flag `tt(e)' can be added to inhibit pattern matching. As +this flag does not inhibit other forms of substitution, care is still +required; using a parameter to hold the key has the desired effect: -example(key2=${key//(#m)[\][+LPAR()+RPAR()\\*?#<>~^]/\\$MATCH} -print ${array[(R)$key2]}) +example(key2='original key' +print ${array[(Re)$key2]}) ) ) item(tt(i))( @@ -286,11 +284,15 @@ evaluates to var(n)). This flag is ignored when the array is associative. The delimiter character tt(:) is arbitrary; see above. ) item(tt(e))( -This flag has no effect and for ordinary arrays is retained for backward -compatibility only. For associative arrays, this flag can be used to -force tt(*) or tt(@) to be interpreted as a single key rather than as a -reference to all values. This flag may be used on the left side of an -assignment. +This flag causes any pattern matching that would be performed on the +subscript to use plain string matching instead. Hence +`tt(${array[(re)*]})' matches only the array element whose value is tt(*). +Note that other forms of substitution such as parameter substitution are +not inhibited. + +This flag can also be used to force tt(*) or tt(@) to be interpreted as +a single key rather than as a reference to all values. It may be used +for either purpose on the left side of an assignment. ) enditem() diff --git a/Src/Builtins/sched.c b/Src/Builtins/sched.c index df4b9ecf9..3ddde6558 100644 --- a/Src/Builtins/sched.c +++ b/Src/Builtins/sched.c @@ -205,13 +205,13 @@ bin_sched(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func)) /* given no arguments, display the schedule list */ if (!*argptr) { for (sn = 1, sch = schedcmds; sch; sch = sch->next, sn++) { - char tbuf[40], *flagstr, *endstr; + char tbuf[60], *flagstr, *endstr; time_t t; struct tm *tmp; t = sch->time; tmp = localtime(&t); - ztrftime(tbuf, 20, "%a %b %e %k:%M:%S", tmp); + ztrftime(tbuf, 40, "%a %b %e %k:%M:%S", tmp); if (sch->flags & SCHEDFLAG_TRASH_ZLE) flagstr = "-o "; else diff --git a/Src/params.c b/Src/params.c index d7135c438..bbacb5476 100644 --- a/Src/params.c +++ b/Src/params.c @@ -1007,7 +1007,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w, int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash; int keymatch = 0, needtok = 0, arglen, len; char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c; - zlong num = 1, beg = 0, r = 0; + zlong num = 1, beg = 0, r = 0, quote_arg = 0; Patprog pprog = NULL; ishash = (v->pm && PM_TYPE(v->pm->node.flags) == PM_HASHED); @@ -1058,8 +1058,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w, sep = "\n"; break; case 'e': - /* Compatibility flag with no effect except to prevent * - * special interpretation by getindex() of `*' or `@'. */ + quote_arg = 1; break; case 'n': t = get_strarg(++s, &arglen); @@ -1286,7 +1285,10 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w, } } if (!keymatch) { - tokenize(s); + if (quote_arg) + untokenize(s); + else + tokenize(s); remnulargs(s); pprog = patcompile(s, 0, NULL); } else diff --git a/Src/pattern.c b/Src/pattern.c index 26b06513e..c9a93cd0e 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -511,7 +511,7 @@ patcompile(char *exp, int inflags, char **endexp) if (!(patflags & PAT_ANY)) { /* Look for a really pure string, with no tokens at all. */ - if (!patglobflags + if (!(patglobflags & ~GF_MULTIBYTE) #ifdef __CYGWIN__ /* * If the OS treats files case-insensitively and we diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 79aecb6c3..433f7743b 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -282,6 +282,7 @@ print ${(P)bar} 0:${(P)...} >I'm nearly out of my mind with tedium +#' deconfuse emacs foo=(I could be watching that programme I recorded) print ${(o)foo} @@ -375,6 +376,7 @@ print ${(QX)foo} 1:${(QX)...} ?(eval):2: unmatched " +# " deconfuse emacs array=(characters in an array) print ${(c)#array} @@ -411,6 +413,7 @@ print ${(pl.10..\x22..X.)foo} 0:${(pl...)...} >Xresulting """"Xwords roariously """Xpadded +#" deconfuse emacs print ${(l.5..X.r.5..Y.)foo} print ${(l.6..X.r.4..Y.)foo} @@ -870,6 +873,7 @@ 0:Parameters associated with backreferences >match 12 16 match >1 1 1 +#' deconfuse emacs string='and look for a MATCH in here' if [[ ${(S)string%%(#m)M*H} = "and look for a in here" ]]; then @@ -1010,3 +1014,36 @@ >fields >in >it + + array=('%' '$' 'j' '*' '$foo') + print ${array[(i)*]} "${array[(i)*]}" + print ${array[(ie)*]} "${array[(ie)*]}" + key='$foo' + print ${array[(ie)$key]} "${array[(ie)$key]}" + key='*' + print ${array[(ie)$key]} "${array[(ie)$key]}" +0:Matching array indices with and without quoting +>1 1 +>4 4 +>5 5 +>4 4 + +# Ordering of associative arrays is arbitrary, so we need to use +# patterns that only match one element. + typeset -A assoc_r + assoc_r=(star '*' of '*this*' and '!that!' or '(the|other)') + print ${(kv)assoc_r[(re)*]} + print ${(kv)assoc_r[(re)*this*]} + print ${(kv)assoc_r[(re)!that!]} + print ${(kv)assoc_r[(re)(the|other)]} + print ${(kv)assoc_r[(r)*at*]} + print ${(kv)assoc_r[(r)*(ywis|bliss|kiss|miss|this)*]} + print ${(kv)assoc_r[(r)(this|that|\(the\|other\))]} +0:Reverse subscripting associative arrays with literal matching +>star * +>of *this* +>and !that! +>or (the|other) +>and !that! +>of *this* +>or (the|other) -- cgit 1.4.1