about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTanaka Akira <akr@users.sourceforge.net>2000-01-22 23:58:38 +0000
committerTanaka Akira <akr@users.sourceforge.net>2000-01-22 23:58:38 +0000
commit5b0c4cae62f351a24260cb35a3a51c94ff556aa1 (patch)
tree2aae597eaff4ba9a85d694752317c9e0d688d2c7
parent7100a47961b04b8f5b7f025721787cd3b78f0cd6 (diff)
downloadzsh-5b0c4cae62f351a24260cb35a3a51c94ff556aa1.tar.gz
zsh-5b0c4cae62f351a24260cb35a3a51c94ff556aa1.tar.xz
zsh-5b0c4cae62f351a24260cb35a3a51c94ff556aa1.zip
zsh-workers/9408
-rw-r--r--Doc/Zsh/expn.yo16
-rw-r--r--Src/pattern.c66
2 files changed, 56 insertions, 26 deletions
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 2a0655679..ee554e6c1 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1246,8 +1246,22 @@ as tt(${)var(param)tt(//)var(pat)tt(/)var(repl)tt(}), only the data for the
 last match remains available.  In the case of global replacements this may
 still be useful.  See the example for the tt(m) flag below.
 
+The numbering of backreferences strictly follows the order of the opening
+parentheses from left to right in the pattern string, although sets of
+parentheses may be nested.  There are special rules for parentheses followed
+by `tt(#)' or `tt(##)'.  Only the last match of the parenthesis is
+remembered: for example, in `tt([[ abab = (#b)([ab])# ]])', only the final
+`tt(b)' is stored in tt(match[1]).  Thus extra parentheses may be necessary
+to match the complete segment: for example, use `tt(X((ab|cd)#)Y)' to match
+a whole string of either `tt(ab)' or `tt(cd)' between `tt(X)' and `tt(Y)',
+using the value of tt($match[1]) rather than tt($match[2]).
+
 If the match fails none of the parameters is altered, so in some cases it
-may be necessary to initialise them beforehand.
+may be necessary to initialise them beforehand.  If some of the
+backreferences fail to match --- which happens if they are in an alternate
+branch which fails to match, or if they are followed by tt(#) and matched
+zero times --- then the matched string is set to the empty string, and the
+start and end indices are set to -1.
 
 Pattern matching with backreferences is slightly slower than without.
 )
diff --git a/Src/pattern.c b/Src/pattern.c
index 27fcd0a7e..1c4abbfb4 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1376,12 +1376,17 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
 		ep = patendp;
 
 		for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
-		    DPUTS(!*sp || !*ep, "BUG: backrefs not set.");
-
-		    if (begp)
-			*begp++ = ztrsub(*sp, patinstart) + patoffset;
-		    if (endp)
-			*endp++ = ztrsub(*ep, patinstart) + patoffset - 1;
+		    if (parsfound & (1 << i)) {
+			if (begp)
+			    *begp++ = ztrsub(*sp, patinstart) + patoffset;
+			if (endp)
+			    *endp++ = ztrsub(*ep, patinstart) + patoffset - 1;
+		    } else {
+			if (begp)
+			    *begp++ = -1;
+			if (endp)
+			    *endp++ = -1;
+		    }
 
 		    sp++;
 		    ep++;
@@ -1403,25 +1408,36 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
 
 		PERMALLOC {
 		    for (i = 0; i < prog->patnpar; i++) {
-			DPUTS(!*sp || !*ep, "BUG: backrefs not set.");
-			matcharr[i] = dupstrpfx(*sp, *ep - *sp);
-			/*
-			 * mbegin and mend give indexes into the string
-			 * in the standard notation, i.e. respecting
-			 * KSHARRAYS, and with the end index giving
-			 * the last character, not one beyond.
-			 * For example, foo=foo; [[ $foo = (f)oo ]] gives
-			 * (without KSHARRAYS) indexes 1 and 1, which
-			 * corresponds to indexing as ${foo[1,1]}.
-			 */
-			sprintf(numbuf, "%ld",
-				(long)(ztrsub(*sp, patinstart) + patoffset +
-				       !isset(KSHARRAYS)));
-			mbeginarr[i] = ztrdup(numbuf);
-			sprintf(numbuf, "%ld",
-				(long)(ztrsub(*ep, patinstart) + patoffset +
-				       !isset(KSHARRAYS) - 1));
-			mendarr[i] = ztrdup(numbuf);
+			if (parsfound & (1 << i)) {
+			    matcharr[i] = dupstrpfx(*sp, *ep - *sp);
+			    /*
+			     * mbegin and mend give indexes into the string
+			     * in the standard notation, i.e. respecting
+			     * KSHARRAYS, and with the end index giving
+			     * the last character, not one beyond.
+			     * For example, foo=foo; [[ $foo = (f)oo ]] gives
+			     * (without KSHARRAYS) indexes 1 and 1, which
+			     * corresponds to indexing as ${foo[1,1]}.
+			     */
+			    sprintf(numbuf, "%ld",
+				    (long)(ztrsub(*sp, patinstart) + 
+					   patoffset +
+					   !isset(KSHARRAYS)));
+			    mbeginarr[i] = ztrdup(numbuf);
+			    sprintf(numbuf, "%ld",
+				    (long)(ztrsub(*ep, patinstart) + 
+					   patoffset +
+					   !isset(KSHARRAYS) - 1));
+			    mendarr[i] = ztrdup(numbuf);
+			} else {
+			    /* Pattern wasn't set: either it was in an
+			     * unmatched branch, or a hashed parenthesis
+			     * that didn't match at all.
+			     */
+			    matcharr[i] = ztrdup("");
+			    mbeginarr[i] = ztrdup("-1");
+			    mendarr[i] = ztrdup("-1");
+			}
 			sp++;
 			ep++;
 		    }