summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--Completion/compinit17
-rw-r--r--Doc/Zsh/expn.yo51
-rw-r--r--Doc/Zsh/options.yo9
-rw-r--r--INSTALL2
-rw-r--r--README7
-rw-r--r--Src/glob.c27
-rw-r--r--Src/hist.c109
-rw-r--r--Src/options.c1
-rw-r--r--Src/subst.c34
-rw-r--r--Src/zsh.h4
-rw-r--r--Test/E01options.ztst14
12 files changed, 222 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index 322c96862..f0709eda9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2006-11-01  Peter Stephenson  <pws@csr.com>
+
+	* 22934, with modifications (c.f. 22937): INSTALL, README,
+	Completion/compinit, Doc/Zsh/expn.yo, Doc/Zsh/options.yo,
+	Src/glob.c, Src/hist.c, Src/options.c, Src/subst.c, Src/zsh.h,
+	Test/E01options.ztst: add HIST_SUBST_PATTERN option, make
+	${.../#%...} anchor at both ends.
+
 2006-11-01  Clint Adams  <clint@zsh.org>
 
 	* 22940: R. Ramkumar: Completion/Unix/Command/_mkdir:
diff --git a/Completion/compinit b/Completion/compinit
index a78fd1ce2..ad24ac7a5 100644
--- a/Completion/compinit
+++ b/Completion/compinit
@@ -128,25 +128,26 @@ fi
 # The standard options set in completion functions.
 
 _comp_options=(
-       extendedglob
        bareglobqual
+       extendedglob
        glob
        multibyte
        nullglob
        rcexpandparam
        unset
-    NO_markdirs
+    NO_allexport
+    NO_aliases
+    NO_cshnullglob
+    NO_errexit
     NO_globsubst
-    NO_shwordsplit
-    NO_shglob
+    NO_histsubstpattern
     NO_kshglob
     NO_ksharrays
     NO_kshtypeset
-    NO_cshnullglob
-    NO_allexport
-    NO_aliases
-    NO_errexit
+    NO_markdirs
     NO_octalzeroes
+    NO_shwordsplit
+    NO_shglob
     NO_warncreateglobal
 )
 
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 6aa31c76a..584977c18 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -258,7 +258,8 @@ item(tt(s/)var(l)tt(/)var(r)[tt(/)])(
 Substitute var(r) for var(l) as described below.
 The substitution is done only for the
 first string that matches var(l).  For arrays and for filename
-generation, this applies to each word of the expanded text.
+generation, this applies to each word of the expanded text.  See
+below for further notes on substitutions.
 
 The forms `tt(gs/)var(l)tt(/)var(r)' and `tt(s/)var(l)tt(/)var(r)tt(/:G)'
 perform global substitution, i.e. substitute every occurrence of var(r)
@@ -273,8 +274,8 @@ backslash.
 )
 enditem()
 
-The tt(s/l/r/) substitution works as follows.  The left-hand side of
-substitutions are not regular expressions, but character strings.  Any
+The tt(s/l/r/) substitution works as follows.  By default the left-hand
+side of substitutions are not patterns, but character strings.  Any
 character can be used as the delimiter in place of `tt(/)'.  A
 backslash quotes the delimiter character.  The character `tt(&)', in
 the right-hand-side var(r), is replaced by the text from the
@@ -286,6 +287,41 @@ the rightmost `tt(?)' in a context scan can similarly be omitted.
 Note the same record of the last var(l) and var(r) is maintained
 across all forms of expansion.
 
+If the option tt(HIST_SUBST_PATTERN) is set, var(l) is treated as
+a pattern of the usual form desribed in
+ifzman(the section FILENAME GENERATION below)\
+ifnzman(noderef(Filename Generation)).  This can be used in
+all the places where modifiers are available; note, however, that
+in globbing qualifiers parameter substitution has already taken place,
+so parameters in the replacement string should be quoted to ensure
+they are replaced at the correct time.
+Note also that complicated patterns used in globbing qualifiers may
+need the extended glob qualifier notation
+tt(LPAR()#q:s/)var(...)tt(/)var(...)tt(/RPAR()) in order for the
+shell to recognize the expression as a glob qualifer.  Further,
+note that bad patterns in the substitution are not subject to
+the tt(NO_BAD_PATTERN) option so will cause an error.
+
+When tt(HIST_SUBST_PATTERN) is set, var(l) may start with a tt(#)
+to indicate that the pattern must match at the start of the string
+to be substituted, and a tt(%) may appear at the start or after an tt(#)
+to indicate that the pattern must match at the end of the string
+to be substituted.  The tt(%) or tt(#) may be quoted with two
+backslashes.
+
+For example, the following piece of filename generation code
+with the tt(EXTENDED_GLOB) option:
+
+example(print *.c+LPAR()#q:s/#%+LPAR()#b+RPAR()s+LPAR()*+RPAR().c/'S${match[1]}.C'/+RPAR())
+
+takes the expansion of tt(*.c) and applies the glob qualifiers in the
+tt(LPAR()#q)var(...)tt(RPAR()) expression, which consists of a substitution
+modifier anchored to the start and end of each word (tt(#%)).  This
+turns on backreferences (tt(LPAR()#b+RPAR())), so that the parenthesised
+subexpression is available in the replacement string as tt(${match[1]}).
+The replacement string is quoted so that the parameter is not substituted
+before the start of filename generation.
+
 The following tt(f), tt(F), tt(w) and tt(W) modifiers work only with
 parameter expansion and filename generation.  They are listed here to
 provide a single point of reference for all modifiers.
@@ -530,13 +566,14 @@ substituted as tt(${~opat}).
 
 The var(pattern) may begin with a `tt(#)', in which case the
 var(pattern) must match at the start of the string, or `tt(%)', in
-which case it must match at the end of the string.  The var(repl) may
+which case it must match at the end of the string, or `tt(#%)' in which
+case the var(pattern) must match the entire string.  The var(repl) may
 be an empty string, in which case the final `tt(/)' may also be omitted.
 To quote the final `tt(/)' in other cases it should be preceded by a
 single backslash; this is not necessary if the
-`tt(/)' occurs inside a substituted parameter.  Note also that the `tt(#)'
-and `tt(%)' are not active if they occur inside a substituted parameter,
-even at the start.
+`tt(/)' occurs inside a substituted parameter.  Note also that the `tt(#)',
+`tt(%)' and `tt(#%) are not active if they occur inside a substituted
+parameter, even at the start.
 
 The first `tt(/)' may be preceded by a `tt(:)', in which case the match
 will only succeed if it matches the entire word.  Note also the
diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo
index 02d8fa046..d4e1deeef 100644
--- a/Doc/Zsh/options.yo
+++ b/Doc/Zsh/options.yo
@@ -376,6 +376,15 @@ characters resulting from command substitution as being eligible for
 filename generation.  Braces (and commas in between) do not become eligible
 for expansion.
 )
+pindex(HIST_SUBST_PATTERN)
+item(tt(HIST_SUBST_PATTERN))(
+Substitutions using the tt(:s) and tt(:&) history modifiers are performed
+with pattern matching instead of string matching.  This occurs wherever
+history modifiers are valid, including glob qualifiers and parameters.
+See
+ifzman(the section Modifiers in zmanref(zshexp))\
+ifnzman(noderef(Modifiers)).
+)
 pindex(IGNORE_BRACES)
 cindex(disabling brace expansion)
 cindex(brace expansion, disabling)
diff --git a/INSTALL b/INSTALL
index 1c76107e0..7f604a27a 100644
--- a/INSTALL
+++ b/INSTALL
@@ -270,7 +270,7 @@ handled properly (some assistance with this problem would be appreciated).
 The configuration script should turn on multibyte support on all systems
 where it can be compiled successfully.
 
-The support can be explicitly enabled or disable with --enable-multibyte or
+The support can be explicitly enabled or disabled with --enable-multibyte or
 --disable-multibyte.  The developers are not aware of any need to use
 --disable-multibyte and this should be reported as a bug.  Currently
 multibyte mode is believed to work on at least the following:
diff --git a/README b/README
index 78fcf5489..d4ad770a3 100644
--- a/README
+++ b/README
@@ -49,6 +49,13 @@ The variable HOME is no longer set by the shell if zsh is emulating any
 other shell at startup; it must be present in the environment or set
 subsequently by the user.  It is valid for the variable to be unset.
 
+Parameter substitutions in the form ${param//#%search/replace} match
+against "search" anchored at both ends of the parameter value.  Previously
+this syntax would have matched against "%search", anchored only at the head
+of the value.  The form ${param//#$search/replace} where the value
+$search starts with "%" considers the "%" to be part of the search
+string as before.
+
 The MULTIBYTE option is on by default where it is available; this
 causes many operations to recognise characters as in the current locale.
 Older versions of the shell always assumed a character was one byte.
diff --git a/Src/glob.c b/Src/glob.c
index 130f8e0c2..201427bdb 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2294,6 +2294,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 	return 1;
     }
     if (matched) {
+	/*
+	 * The default behaviour is to match at the start; this
+	 * is modified by SUB_END and SUB_SUBSTR.  SUB_END matches
+	 * at the end of the string instead of the start.  SUB_SUBSTR
+	 * without SUB_END matches substrings searching from the start;
+	 * with SUB_END it matches substrings searching from the end.
+	 *
+	 * The possibilities are further modified by whether we want the
+	 * longest (SUB_LONG) or shortest possible match.
+	 *
+	 * SUB_START is only used in the case where we are also
+	 * forcing a match at the end (SUB_END with no SUB_SUBSTR,
+	 * with or without SUB_LONG), to indicate we should match
+	 * the entire string.
+	 */
 	switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
 	case 0:
 	case SUB_LONG:
@@ -2341,13 +2356,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 		set_pat_start(p, t-s);
 		if (pattrylen(p, t, s + l - t, umlen, ioff))
 		    tmatch = t;
+		if (fl & SUB_START)
+		    break;
 		umlen -= iincchar(&t);
 	    }
 	    if (tmatch) {
 		*sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, repllist);
 		return 1;
 	    }
-	    if (pattrylen(p, s + l, 0, 0, ioff)) {
+	    if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
 		*sp = get_match_ret(*sp, l, l, fl, replstr, repllist);
 		return 1;
 	    }
@@ -2364,8 +2381,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 		    *sp = get_match_ret(*sp, t-s, l, fl, replstr, repllist);
 		    return 1;
 		}
+		if (fl & SUB_START)
+		    break;
 		umlen -= iincchar(&t);
 	    }
+	    if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
+		*sp = get_match_ret(*sp, l, l, fl, replstr, repllist);
+		return 1;
+	    }
 	    break;
 
 	case SUB_SUBSTR:
@@ -2566,7 +2589,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 
     /* munge the whole string: no match, so no replstr */
     *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
-    return 1;
+    return (fl & SUB_RETFAIL) ? 0 : 1;
 }
 
 /**/
diff --git a/Src/hist.c b/Src/hist.c
index 33c4035bf..68163181e 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -323,7 +323,8 @@ getsubsargs(char *subline, int *gbalp, int *cflagp)
     if (strlen(ptr1)) {
 	zsfree(hsubl);
 	hsubl = ptr1;
-    }
+    } else if (!hsubl)		/* fail silently on this */
+	return 0;
     zsfree(hsubr);
     hsubr = ptr2;
     follow = ingetc();
@@ -337,11 +338,6 @@ getsubsargs(char *subline, int *gbalp, int *cflagp)
 	}
     } else
 	inungetc(follow);
-    if (hsubl && !strstr(subline, hsubl)) {
-	herrflush();
-	zerr("substitution failed");
-	return 1;
-    }
     return 0;
 }
 
@@ -354,6 +350,15 @@ getargc(Histent ehist)
     return ehist->nwords ? ehist->nwords-1 : 0;
 }
 
+/**/
+static int
+substfailed(void)
+{
+    herrflush();
+    zerr("substitution failed");
+    return -1;
+}
+
 /* Perform history substitution, returning the next character afterwards. */
 
 /**/
@@ -376,10 +381,15 @@ histsubchar(int c)
 	isfirstch = 0;
 	inungetc(hatchar);
 	if (!(ehist = gethist(defev))
-	    || !(sline = getargs(ehist, 0, getargc(ehist)))
-	    || getsubsargs(sline, &gbal, &cflag) || !hsubl)
+	    || !(sline = getargs(ehist, 0, getargc(ehist))))
 	    return -1;
-	subst(&sline, hsubl, hsubr, gbal);
+
+	if (getsubsargs(sline, &gbal, &cflag))
+	    return substfailed();
+	if (!hsubl)
+	    return -1;
+	if (subst(&sline, hsubl, hsubr, gbal))
+	    return substfailed();
     } else {
 	/* Line doesn't begin ^foo^bar */
 	if (c != ' ')
@@ -608,9 +618,10 @@ histsubchar(int c)
 		if (getsubsargs(sline, &gbal, &cflag))
 		    return -1; /* fall through */
 	    case '&':
-		if (hsubl && hsubr)
-		    subst(&sline, hsubl, hsubr, gbal);
-		else {
+		if (hsubl && hsubr) {
+		    if (subst(&sline, hsubl, hsubr, gbal))
+			return substfailed();
+		} else {
 		    herrflush();
 		    zerr("no previous substitution");
 		    return -1;
@@ -1629,30 +1640,72 @@ casemodify(char *str, int how)
     return str2;
 }
 
+
+/*
+ * Substitute "in" for "out" in "*strptr" and update "*strptr".
+ * If "gbal", do global substitution.
+ *
+ * This returns a result from the heap.  There seems to have
+ * been some confusion on this point.
+ */
+
 /**/
-void
+int
 subst(char **strptr, char *in, char *out, int gbal)
 {
-    char *str = *strptr, *instr = *strptr, *substcut, *sptr, *oldstr;
+    char *str = *strptr, *substcut, *sptr;
     int off, inlen, outlen;
 
     if (!*in)
 	in = str, gbal = 0;
-    if (!(substcut = (char *)strstr(str, in)))
-	return;
-    inlen = strlen(in);
-    sptr = convamps(out, in, inlen);
-    outlen = strlen(sptr);
 
-    do {
-	*substcut = '\0';
-	off = substcut - *strptr + outlen;
-	substcut += inlen;
-	*strptr = tricat(oldstr = *strptr, sptr, substcut);
-	if (oldstr != instr)
-	    zsfree(oldstr);
-	str = (char *)*strptr + off;
-    } while (gbal && (substcut = (char *)strstr(str, in)));
+    if (isset(HISTSUBSTPATTERN)) {
+	int fl = SUB_LONG|SUB_REST|SUB_RETFAIL;
+	char *oldin = in;
+	if (gbal)
+	    fl |= SUB_GLOBAL;
+	if (*in == '#' || *in == Pound) {
+	    /* anchor at head, flag needed if SUB_END is also set */
+	    fl |= SUB_START;
+	    in++;
+	}
+	if (*in == '%') {
+	    /* anchor at tail */
+	    in++;
+	    fl |= SUB_END;
+	}
+	if (in == oldin) {
+	    /* no anchor, substring match */
+	    fl |= SUB_SUBSTR;
+	}
+	if (in == str)
+	    in = dupstring(in);
+	if (parse_subst_string(in) || errflag)
+	    return 1;
+	if (parse_subst_string(out) || errflag)
+	    return 1;
+	singsub(&in);
+	if (getmatch(strptr, in, fl, 1, out))
+	    return 0;
+    } else {
+	if ((substcut = (char *)strstr(str, in))) {
+	    inlen = strlen(in);
+	    sptr = convamps(out, in, inlen);
+	    outlen = strlen(sptr);
+
+	    do {
+		*substcut = '\0';
+		off = substcut - *strptr + outlen;
+		substcut += inlen;
+		*strptr = zhtricat(*strptr, sptr, substcut);
+		str = (char *)*strptr + off;
+	    } while (gbal && (substcut = (char *)strstr(str, in)));
+
+	    return 0;
+	}
+    }
+
+    return 1;
 }
 
 /**/
diff --git a/Src/options.c b/Src/options.c
index d30553050..821773d2f 100644
--- a/Src/options.c
+++ b/Src/options.c
@@ -137,6 +137,7 @@ static struct optname optns[] = {
 {{NULL, "histignorespace",    0},			 HISTIGNORESPACE},
 {{NULL, "histnofunctions",    0},			 HISTNOFUNCTIONS},
 {{NULL, "histnostore",	      0},			 HISTNOSTORE},
+{{NULL, "histsubstpattern",   OPT_EMULATE},              HISTSUBSTPATTERN},
 {{NULL, "histreduceblanks",   0},			 HISTREDUCEBLANKS},
 {{NULL, "histsavebycopy",     OPT_ALL},			 HISTSAVEBYCOPY},
 {{NULL, "histsavenodups",     0},			 HISTSAVENODUPS},
diff --git a/Src/subst.c b/Src/subst.c
index 8ef8d446e..abc3c82af 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2355,15 +2355,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		c = *++s;
 	    }
 	    /* Check for anchored substitution */
-	    if (c == '%') {
+	    if (c == '#' || c == Pound) {
+		/*
+		 * anchor at head: this is the `normal' case in
+		 * getmatch and we only require the flag if SUB_END
+		 * is also present.
+		 */
+		flags |= SUB_START;
+		s++;
+	    }
+	    if (*s == '%') {
 		/* anchor at tail */
 		flags |= SUB_END;
 		s++;
-	    } else if (c == '#' || c == Pound) {
-		/* anchor at head: this is the `normal' case in getmatch */
-		s++;
-	    } else
+	    }
+	    if (!(flags & (SUB_START|SUB_END))) {
+		/* No anchor, so substring */
 		flags |= SUB_SUBSTR;
+	    }
 	    /*
 	     * Find the / marking the end of the search pattern.
 	     * If there isn't one, we're just going to delete that,
@@ -2526,7 +2535,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
             /* This once was executed only `if (qt) ...'. But with that
              * patterns in a expansion resulting from a ${(e)...} aren't
              * tokenized even though this function thinks they are (it thinks
-             * they are because subst_parse_str() turns Qstring tokens
+             * they are because parse_subst_str() turns Qstring tokens
              * into String tokens and for unquoted parameter expansions the
              * lexer normally does tokenize patterns inside parameter
              * expansions). */
@@ -3273,6 +3282,7 @@ modify(char **str, char **ptr)
 		break;
 
 	    case 's':
+		/* TODO: multibyte delimiter */
 		c = **ptr;
 		(*ptr)++;
 		ptr1 = *ptr;
@@ -3298,7 +3308,8 @@ modify(char **str, char **ptr)
 		for (tt = hsubl; *tt; tt++)
 		    if (inull(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
-		untokenize(hsubl);
+		if (!isset(HISTSUBSTPATTERN))
+		    untokenize(hsubl);
 		for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
 		    if (inull(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
@@ -3444,15 +3455,8 @@ modify(char **str, char **ptr)
 		    *str = casemodify(*str, CASMOD_UPPER);
 		    break;
 		case 's':
-		    if (hsubl && hsubr) {
-			char *oldstr = *str;
-
+		    if (hsubl && hsubr)
 			subst(str, hsubl, hsubr, gbal);
-			if (*str != oldstr) {
-			    *str = dupstring(oldstr = *str);
-			    zsfree(oldstr);
-			}
-		    }
 		    break;
 		case 'q':
 		    *str = quotestring(*str, NULL, QT_BACKSLASH);
diff --git a/Src/zsh.h b/Src/zsh.h
index c73ae3b9a..1c693fef4 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1405,6 +1405,9 @@ struct tieddata {
 #define SUB_ALL		0x0100	/* match complete string */
 #define SUB_GLOBAL	0x0200	/* global substitution ${..//all/these} */
 #define SUB_DOSUBST	0x0400	/* replacement string needs substituting */
+#define SUB_RETFAIL	0x0800  /* return status 0 if no match */
+#define SUB_START	0x1000  /* force match at start with SUB_END
+				 * and no SUB_SUBSTR */
 
 /* Flags as the second argument to prefork */
 #define PF_TYPESET	0x01	/* argument handled like typeset foo=bar */
@@ -1631,6 +1634,7 @@ enum {
     HISTREDUCEBLANKS,
     HISTSAVEBYCOPY,
     HISTSAVENODUPS,
+    HISTSUBSTPATTERN,
     HISTVERIFY,
     HUP,
     IGNOREBRACES,
diff --git a/Test/E01options.ztst b/Test/E01options.ztst
index da4020c15..1fbe0cc93 100644
--- a/Test/E01options.ztst
+++ b/Test/E01options.ztst
@@ -487,6 +487,20 @@
 >tmpcd tmpfile1 tmpfile2
 >tmp*
 
+  setopt histsubstpattern
+  print *(:s/t??/TING/)
+  foo=(tmp*)
+  print ${foo:s/??p/THUMP/}
+  foo=(one.c two.c three.c)
+  print ${foo:s/#%(#b)t(*).c/T${match[1]}.X/}
+  print *(#q:s/#(#b)tmp(*e)/'scrunchy${match[1]}'/)
+  unsetopt histsubstpattern
+0:HIST_SUBST_PATTERN option
+>TINGcd TINGfile1 TINGfile2
+>THUMPcd THUMPfile1 THUMPfile2
+>one.c Two.X Three.X
+>scrunchyfile1 scrunchyfile2 tmpcd
+
   setopt ignorebraces
   echo X{a,b}Y
   unsetopt ignorebraces