From 5b0c4cae62f351a24260cb35a3a51c94ff556aa1 Mon Sep 17 00:00:00 2001 From: Tanaka Akira Date: Sat, 22 Jan 2000 23:58:38 +0000 Subject: zsh-workers/9408 --- Doc/Zsh/expn.yo | 16 +++++++++++++- Src/pattern.c | 66 +++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 2a0655679..ee554e6c1 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1246,8 +1246,22 @@ as tt(${)var(param)tt(//)var(pat)tt(/)var(repl)tt(}), only the data for the last match remains available. In the case of global replacements this may still be useful. See the example for the tt(m) flag below. +The numbering of backreferences strictly follows the order of the opening +parentheses from left to right in the pattern string, although sets of +parentheses may be nested. There are special rules for parentheses followed +by `tt(#)' or `tt(##)'. Only the last match of the parenthesis is +remembered: for example, in `tt([[ abab = (#b)([ab])# ]])', only the final +`tt(b)' is stored in tt(match[1]). Thus extra parentheses may be necessary +to match the complete segment: for example, use `tt(X((ab|cd)#)Y)' to match +a whole string of either `tt(ab)' or `tt(cd)' between `tt(X)' and `tt(Y)', +using the value of tt($match[1]) rather than tt($match[2]). + If the match fails none of the parameters is altered, so in some cases it -may be necessary to initialise them beforehand. +may be necessary to initialise them beforehand. If some of the +backreferences fail to match --- which happens if they are in an alternate +branch which fails to match, or if they are followed by tt(#) and matched +zero times --- then the matched string is set to the empty string, and the +start and end indices are set to -1. Pattern matching with backreferences is slightly slower than without. ) diff --git a/Src/pattern.c b/Src/pattern.c index 27fcd0a7e..1c4abbfb4 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -1376,12 +1376,17 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) ep = patendp; for (i = 0; i < prog->patnpar && i < maxnpos; i++) { - DPUTS(!*sp || !*ep, "BUG: backrefs not set."); - - if (begp) - *begp++ = ztrsub(*sp, patinstart) + patoffset; - if (endp) - *endp++ = ztrsub(*ep, patinstart) + patoffset - 1; + if (parsfound & (1 << i)) { + if (begp) + *begp++ = ztrsub(*sp, patinstart) + patoffset; + if (endp) + *endp++ = ztrsub(*ep, patinstart) + patoffset - 1; + } else { + if (begp) + *begp++ = -1; + if (endp) + *endp++ = -1; + } sp++; ep++; @@ -1403,25 +1408,36 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) PERMALLOC { for (i = 0; i < prog->patnpar; i++) { - DPUTS(!*sp || !*ep, "BUG: backrefs not set."); - matcharr[i] = dupstrpfx(*sp, *ep - *sp); - /* - * mbegin and mend give indexes into the string - * in the standard notation, i.e. respecting - * KSHARRAYS, and with the end index giving - * the last character, not one beyond. - * For example, foo=foo; [[ $foo = (f)oo ]] gives - * (without KSHARRAYS) indexes 1 and 1, which - * corresponds to indexing as ${foo[1,1]}. - */ - sprintf(numbuf, "%ld", - (long)(ztrsub(*sp, patinstart) + patoffset + - !isset(KSHARRAYS))); - mbeginarr[i] = ztrdup(numbuf); - sprintf(numbuf, "%ld", - (long)(ztrsub(*ep, patinstart) + patoffset + - !isset(KSHARRAYS) - 1)); - mendarr[i] = ztrdup(numbuf); + if (parsfound & (1 << i)) { + matcharr[i] = dupstrpfx(*sp, *ep - *sp); + /* + * mbegin and mend give indexes into the string + * in the standard notation, i.e. respecting + * KSHARRAYS, and with the end index giving + * the last character, not one beyond. + * For example, foo=foo; [[ $foo = (f)oo ]] gives + * (without KSHARRAYS) indexes 1 and 1, which + * corresponds to indexing as ${foo[1,1]}. + */ + sprintf(numbuf, "%ld", + (long)(ztrsub(*sp, patinstart) + + patoffset + + !isset(KSHARRAYS))); + mbeginarr[i] = ztrdup(numbuf); + sprintf(numbuf, "%ld", + (long)(ztrsub(*ep, patinstart) + + patoffset + + !isset(KSHARRAYS) - 1)); + mendarr[i] = ztrdup(numbuf); + } else { + /* Pattern wasn't set: either it was in an + * unmatched branch, or a hashed parenthesis + * that didn't match at all. + */ + matcharr[i] = ztrdup(""); + mbeginarr[i] = ztrdup("-1"); + mendarr[i] = ztrdup("-1"); + } sp++; ep++; } -- cgit 1.4.1