about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2008-03-25 17:47:10 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2008-03-25 17:47:10 +0000
commit56e1904e45cc474a9858dc1205055008f8c63752 (patch)
tree075e089f92f9916e36feee06c38321c58dd1a97d
parent238df34af407d755a9c6fc94cade1272c7ea5b62 (diff)
downloadzsh-56e1904e45cc474a9858dc1205055008f8c63752.tar.gz
zsh-56e1904e45cc474a9858dc1205055008f8c63752.tar.xz
zsh-56e1904e45cc474a9858dc1205055008f8c63752.zip
fix optimisation of string matching with multibyte mode
24732: attempt to provide adequate space for sched ztrftime string with
multibyte characters
-rw-r--r--ChangeLog10
-rw-r--r--Doc/Zsh/params.yo30
-rw-r--r--Src/Builtins/sched.c4
-rw-r--r--Src/params.c10
-rw-r--r--Src/pattern.c2
-rw-r--r--Test/D04parameter.ztst37
6 files changed, 72 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index 211a5b42b..6f2e0d04e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-03-25  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 24732: Src/Builtins/sched.c: not enough space for ztrftime
+	string with multibyte characters.
+
+	* 24731: Doc/Zsh/params.yo, Src/params.c, Src/pattern.c,
+	Test/D04parameter.ztst: use [(e)...] parameter flag to quote
+	pattern characters, also fix bug that optimization of matches
+	with no pattern characters didnʼt work for multibyte matching.
+
 2008-03-23  Clint Adams  <clint@zsh.org>
 
 	* 24730: Completion/Unix/Command/_git: use --no-color when
diff --git a/Doc/Zsh/params.yo b/Doc/Zsh/params.yo
index 797fc3e0e..48e1838bf 100644
--- a/Doc/Zsh/params.yo
+++ b/Doc/Zsh/params.yo
@@ -234,16 +234,14 @@ subscript 0; this is empty unless one of the options tt(KSH_ARRAYS) or
 tt(KSH_ZERO_SUBSCRIPT) is in effect.
 
 Note that in subscripts with both `tt(r)' and `tt(R)' pattern characters
-are active even if they were substituted for a parameter (regardless
-of the setting of tt(GLOB_SUBST) which controls this feature in normal
-pattern matching).  It is therefore necessary to quote pattern characters
-for an exact string match.  Given a string in tt($key), and assuming
-the tt(EXTENDED_GLOB) option is set, the following is sufficient to
-match an element of an array tt($array) containing exactly the value of
-tt($key):
+are active even if they were substituted for a parameter (regardless of the
+setting of tt(GLOB_SUBST) which controls this feature in normal pattern
+matching).  The flag `tt(e)' can be added to inhibit pattern matching.  As
+this flag does not inhibit other forms of substitution, care is still
+required; using a parameter to hold the key has the desired effect:
 
-example(key2=${key//(#m)[\][+LPAR()+RPAR()\\*?#<>~^]/\\$MATCH}
-print ${array[(R)$key2]})
+example(key2='original key'
+print ${array[(Re)$key2]})
 )
 )
 item(tt(i))(
@@ -286,11 +284,15 @@ evaluates to var(n)).  This flag is ignored when the array is associative.
 The delimiter character tt(:) is arbitrary; see above.
 )
 item(tt(e))(
-This flag has no effect and for ordinary arrays is retained for backward
-compatibility only.  For associative arrays, this flag can be used to
-force tt(*) or tt(@) to be interpreted as a single key rather than as a
-reference to all values.  This flag may be used on the left side of an
-assignment.
+This flag causes any pattern matching that would be performed on the
+subscript to use plain string matching instead.  Hence
+`tt(${array[(re)*]})' matches only the array element whose value is tt(*).
+Note that other forms of substitution such as parameter substitution are
+not inhibited.
+
+This flag can also be used to force tt(*) or tt(@) to be interpreted as
+a single key rather than as a reference to all values.  It may be used
+for either purpose on the left side of an assignment.
 )
 enditem()
 
diff --git a/Src/Builtins/sched.c b/Src/Builtins/sched.c
index df4b9ecf9..3ddde6558 100644
--- a/Src/Builtins/sched.c
+++ b/Src/Builtins/sched.c
@@ -205,13 +205,13 @@ bin_sched(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func))
     /* given no arguments, display the schedule list */
     if (!*argptr) {
 	for (sn = 1, sch = schedcmds; sch; sch = sch->next, sn++) {
-	    char tbuf[40], *flagstr, *endstr;
+	    char tbuf[60], *flagstr, *endstr;
 	    time_t t;
 	    struct tm *tmp;
 
 	    t = sch->time;
 	    tmp = localtime(&t);
-	    ztrftime(tbuf, 20, "%a %b %e %k:%M:%S", tmp);
+	    ztrftime(tbuf, 40, "%a %b %e %k:%M:%S", tmp);
 	    if (sch->flags & SCHEDFLAG_TRASH_ZLE)
 		flagstr = "-o ";
 	    else
diff --git a/Src/params.c b/Src/params.c
index d7135c438..bbacb5476 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -1007,7 +1007,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
     int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash;
     int keymatch = 0, needtok = 0, arglen, len;
     char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c;
-    zlong num = 1, beg = 0, r = 0;
+    zlong num = 1, beg = 0, r = 0, quote_arg = 0;
     Patprog pprog = NULL;
 
     ishash = (v->pm && PM_TYPE(v->pm->node.flags) == PM_HASHED);
@@ -1058,8 +1058,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
 		sep = "\n";
 		break;
 	    case 'e':
-		/* Compatibility flag with no effect except to prevent *
-		 * special interpretation by getindex() of `*' or `@'. */
+		quote_arg = 1;
 		break;
 	    case 'n':
 		t = get_strarg(++s, &arglen);
@@ -1286,7 +1285,10 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
 	    }
 	}
 	if (!keymatch) {
-	    tokenize(s);
+	    if (quote_arg)
+		untokenize(s);
+	    else
+		tokenize(s);
 	    remnulargs(s);
 	    pprog = patcompile(s, 0, NULL);
 	} else
diff --git a/Src/pattern.c b/Src/pattern.c
index 26b06513e..c9a93cd0e 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -511,7 +511,7 @@ patcompile(char *exp, int inflags, char **endexp)
 
     if (!(patflags & PAT_ANY)) {
 	/* Look for a really pure string, with no tokens at all. */
-	if (!patglobflags
+	if (!(patglobflags & ~GF_MULTIBYTE)
 #ifdef __CYGWIN__
 	    /*
 	     * If the OS treats files case-insensitively and we
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 79aecb6c3..433f7743b 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -282,6 +282,7 @@
   print ${(P)bar}
 0:${(P)...}
 >I'm nearly out of my mind with tedium
+#' deconfuse emacs
 
   foo=(I could be watching that programme I recorded)
   print ${(o)foo}
@@ -375,6 +376,7 @@
   print ${(QX)foo}
 1:${(QX)...}
 ?(eval):2: unmatched "
+# " deconfuse emacs
 
   array=(characters in an array)
   print ${(c)#array}
@@ -411,6 +413,7 @@
   print ${(pl.10..\x22..X.)foo}
 0:${(pl...)...}
 >Xresulting """"Xwords roariously """Xpadded
+#" deconfuse emacs
 
   print ${(l.5..X.r.5..Y.)foo}
   print ${(l.6..X.r.4..Y.)foo}
@@ -870,6 +873,7 @@
 0:Parameters associated with backreferences
 >match 12 16 match
 >1 1 1
+#' deconfuse emacs
 
   string='and look for a MATCH in here'
   if [[ ${(S)string%%(#m)M*H} = "and look for a  in here" ]]; then
@@ -1010,3 +1014,36 @@
 >fields
 >in
 >it
+
+  array=('%' '$' 'j' '*' '$foo')
+  print ${array[(i)*]} "${array[(i)*]}"
+  print ${array[(ie)*]} "${array[(ie)*]}"
+  key='$foo'
+  print ${array[(ie)$key]} "${array[(ie)$key]}"
+  key='*'
+  print ${array[(ie)$key]} "${array[(ie)$key]}"
+0:Matching array indices with and without quoting
+>1 1
+>4 4
+>5 5
+>4 4
+
+# Ordering of associative arrays is arbitrary, so we need to use
+# patterns that only match one element.
+  typeset -A assoc_r
+  assoc_r=(star '*' of '*this*' and '!that!' or '(the|other)')
+  print ${(kv)assoc_r[(re)*]}
+  print ${(kv)assoc_r[(re)*this*]}
+  print ${(kv)assoc_r[(re)!that!]}
+  print ${(kv)assoc_r[(re)(the|other)]}
+  print ${(kv)assoc_r[(r)*at*]}
+  print ${(kv)assoc_r[(r)*(ywis|bliss|kiss|miss|this)*]}
+  print ${(kv)assoc_r[(r)(this|that|\(the\|other\))]}
+0:Reverse subscripting associative arrays with literal matching
+>star *
+>of *this*
+>and !that!
+>or (the|other)
+>and !that!
+>of *this*
+>or (the|other)