From a81e5a609e6d73a83c5a55a3f64f760c6fb45b92 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Fri, 27 Jul 2007 21:51:32 +0000 Subject: add (#cN,M) globbing flag --- ChangeLog | 6 +++ Doc/Zsh/expn.yo | 12 +++++ Misc/globtests | 10 ++++ Src/pattern.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++--- Test/D02glob.ztst | 14 ++++++ 5 files changed, 178 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index b1c588e8b..c07f8c061 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2007-07-27 Peter Stephenson + + * 23713: Doc/Zsh/expn.yo, Misc/globtests, Src/pattern.c, + Test/D02glob.ztst: add (#cN,M) globbing flag to work like + {N,M} in regular expressions. + 2007-07-27 Clint Adams * 23712: Completion/Unix/Command/_dvi: handle dvips -j. diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index c3c6d5baf..2460f148e 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1636,6 +1636,18 @@ item(tt(B))( Deactivate backreferences, negating the effect of the tt(b) flag from that point on. ) +item(tt(c)var(N)tt(,)var(M))( +The flag tt(LPAR()#c)var(N)tt(,)var(M)tt(RPAR()) can be used anywhere +that the tt(#) or tt(##) operators can be used; it cannot be combined +with other globbing flags and a bad pattern error occurs if it is +misplaced. It is equivalent to the form tt({)var(N)tt(,)var(M)tt(}) in +regular expressions. The previous character or group is required to +match between var(N) and var(M) times, inclusive. The form +tt(LPAR()#c)var(N)tt(RPAR()) requires exactly tt(N) matches; +tt(LPAR()#c,)var(M)tt(RPAR()) is equivalent to specifying var(N) as 0; +tt(LPAR()#c)var(N)tt(,RPAR()) specifies that there is no maximum limit +on the number of matches. +) item(tt(m))( Set references to the match data for the entire string matched; this is similar to backreferencing and does not work in filename generation. The diff --git a/Misc/globtests b/Misc/globtests index a5f7c4a00..65fdbdca2 100755 --- a/Misc/globtests +++ b/Misc/globtests @@ -182,5 +182,15 @@ f atest/path *((#s)|/)test((#e)|/)* f path/testy *((#s)|/)test((#e)|/)* f path/testy/ohyes *((#s)|/)test((#e)|/)* f path/atest/ohyes *((#s)|/)test((#e)|/)* +t XabcdabcY X(ab|c|d)(#c5)Y +t XabcdabcY X(ab|c|d)(#c1,5)Y +t XabcdabcY X(ab|c|d)(#c5,8)Y +t XabcdabcY X(ab|c|d)(#c4,)Y +f XabcdabcY X(ab|c|d)(#c6,)Y +f XabcdabcY X(ab|c|d)(#c1,4)Y +t ZX Z(|)(#c1)X +t froofroo (fro(#c2))(#c2) +f froofroofroo (fro(#c2))(#c2) +f froofro (fro(#c2))(#c2) EOT print "$failed tests failed." diff --git a/Src/pattern.c b/Src/pattern.c index 7d9729c0d..9e2df499c 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -105,6 +105,8 @@ typedef union upat *Upat; #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */ #define P_ISSTART 0x09 /* no Match start of string. */ #define P_ISEND 0x0a /* no Match end of string. */ +#define P_COUNTSTART 0x0b /* no Initialise P_COUNT */ +#define P_COUNT 0x0c /* 3*long uc* node Match a number of repetitions */ /* numbered so we can test bit 5 for a branch */ #define P_BRANCH 0x20 /* node Match this alternative, or the next... */ #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */ @@ -179,6 +181,17 @@ typedef union upat *Upat; * for top level file globs, e.g. ** / *.c~*foo.c * ^ I had to leave this space * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long. + * + * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified + * closure (#cN,M) and is used to initialise the count. Executing + * the pattern leads back to the P_COUNT, while the next links of the + * P_COUNTSTART and P_COUNT lead to the tail of the pattern: + * + * ---------------- + * v ^ + * pattern tail + * v v ^ + * ------------------------ */ #define PP_ALPHA 1 #define PP_ALNUM 2 @@ -211,6 +224,13 @@ typedef union upat *Upat; #define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */ #define P_LS_STR(p) ((char *)((p) + 2)) +/* Specific to P_COUNT: arguments as offset in nodes from operator */ +#define P_CT_CURRENT (1) /* Current count */ +#define P_CT_MIN (2) /* Minimum count */ +#define P_CT_MAX (3) /* Maximum count, -1 for none */ +#define P_CT_PTR (4) /* Pointer to last match start */ +#define P_CT_OPERAND (5) /* Operand of P_COUNT */ + /* Flags needed when pattern is executed */ #define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */ #define P_HSTART 0x02 /* Starts with # or ##'d pattern. */ @@ -766,7 +786,7 @@ patcompswitch(int paren, int *flagp) * no top-level |'s. * * No gfchanged, as nothing to follow branch at top - * level. + * level. */ union upat up; gfnode = patnode(P_GFLAGS); @@ -948,7 +968,7 @@ patgetglobflags(char **strp, long *assertp, int *ignore) *ignore = 1; /* (#X): assumes we are still positioned on the first X */ for (; *ptr && *ptr != Outpar; ptr++) { - if (*ptr == 'q') { + if (*ptr == 'q') { /* Glob qualifiers, ignored in pattern code */ while (*ptr && *ptr != Outpar) ptr++; @@ -1044,8 +1064,9 @@ patgetglobflags(char **strp, long *assertp, int *ignore) static long patcomppiece(int *flagp) { - long starter = 0, next, pound, op; + long starter = 0, next, op, opnd; int flags, flags2, kshchar, len, ch, patch, nmeta; + int pound, count; union upat up; char *nptr, *str0, *ptr, *patprev; zrange_t from, to; @@ -1098,10 +1119,13 @@ patcomppiece(int *flagp) /* more than one character matched? */ morelen = (patprev > str0); /* - * If we have more than one character, a following hash only - * applies to the last, so backtrack one character. + * If we have more than one character, a following hash + * or (#c...) only applies to the last, so backtrack one character. */ - if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen) + if (isset(EXTENDEDGLOB) && + (*patparse == Pound || + (*patparse == Inpar && patparse[1] == Pound && + patparse[2] == 'c')) && morelen) patparse = patprev; /* * If len is 1, we can't have an active # following, so doesn't @@ -1345,7 +1369,10 @@ patcomppiece(int *flagp) } } + count = 0; if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) && + !(count = (isset(EXTENDEDGLOB) && *patparse == Inpar && + patparse[1] == Pound && patparse[2] == 'c')) && (kshchar <= 0 || kshchar == '@' || kshchar == '!')) { *flagp = flags; return starter; @@ -1364,6 +1391,10 @@ patcomppiece(int *flagp) } else if (kshchar == '?') { op = 0; *flagp = 0; + } else if (count) { + op = P_COUNT; + patparse += 3; + *flagp = P_HSTART; } else if (*++patparse == Pound) { op = P_TWOHASH; patparse++; @@ -1383,7 +1414,56 @@ patcomppiece(int *flagp) * each time we fail on a non-empty branch, we try the empty branch, * which is equivalent to backtracking. */ - if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && + if (op == P_COUNT) { + /* (#cN,M) */ + union upat countargs[P_CT_OPERAND]; + char *opp = patparse; + + countargs[0].l = P_COUNT; + countargs[P_CT_CURRENT].l = 0L; + countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10); + if (patparse == opp) { + /* missing number treated as zero */ + countargs[P_CT_MIN].l = 0L; + } + if (*patparse != ',' && *patparse != Comma) { + /* either max = min or error */ + if (*patparse != Outpar) + return 0; + countargs[P_CT_MAX].l = countargs[P_CT_MIN].l; + } else { + opp = ++patparse; + countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10); + if (*patparse != Outpar) + return 0; + if (patparse == opp) { + /* missing number treated as infinity: record as -1 */ + countargs[P_CT_MAX].l = -1L; + } + } + patparse++; + countargs[P_CT_PTR].p = NULL; + /* Mark this chain as a min/max count... */ + patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs)); + /* + * The next of the operand is a loop back to the P_COUNT. This is + * how we get recursion for the count. We don't loop back to + * the P_COUNTSTART; that's used for initialising the count + * and saving and restoring the count for any enclosing use + * of the match. + */ + opnd = P_OPERAND(starter) + P_CT_OPERAND; + pattail(opnd, patnode(P_BACK)); + pattail(opnd, P_OPERAND(starter)); + /* + * The next of the counter operators is what follows the + * closure. + * This handles matching of the tail. + */ + next = patnode(P_NOTHING); + pattail(starter, next); + pattail(P_OPERAND(starter), next); + } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && P_OP((Upat)patout+starter) == P_ANY) { /* Optimize ?# to *. Silly thing to do, since who would use * use ?# ? But it makes the later code shorter. @@ -1397,7 +1477,7 @@ patcomppiece(int *flagp) uptr->l = (uptr->l & ~0xff) | P_STAR; } } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) { - /* Don't simplify if we need to look for approximations. */ + /* Simplify, but not if we need to look for approximations. */ patinsert(op, starter, NULL, 0); } else if (op == P_ONEHASH) { /* Emit x# as (x&|), where & means "self". */ @@ -2830,6 +2910,54 @@ patmatch(Upat prog) if (patinput < patinend || (patflags & PAT_NOTEND)) fail = 1; break; + case P_COUNTSTART: + { + /* + * Save and restore the current count and the + * start pointer in case the pattern has been + * executed by a previous repetition of a + * closure. + */ + long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l; + long savecount = *curptr; + unsigned char *saveptr = scan[P_CT_PTR].p; + int ret; + + *curptr = 0L; + ret = patmatch(P_OPERAND(scan)); + *curptr = savecount; + scan[P_CT_PTR].p = saveptr; + return ret; + } + case P_COUNT: + { + /* (#cN,M): execution is relatively straightforward */ + long cur = scan[P_CT_CURRENT].l; + long min = scan[P_CT_MIN].l; + long max = scan[P_CT_MAX].l; + + if (cur && cur >= min && + (unsigned char *)patinput == scan[P_CT_PTR].p) { + /* + * Not at the first attempt to match so + * the previous attempt managed zero length. + * We can do this indefinitely so there's + * no point in going on. Simply try to + * match the remainder of the pattern. + */ + return patmatch(next); + } + scan[P_CT_PTR].p = (unsigned char *)patinput; + + if (max < 0 || cur < max) { + scan[P_CT_CURRENT].l = cur + 1; + if (patmatch(scan + P_CT_OPERAND)) + return 1; + } + if (cur < min) + return 0; + return patmatch(next); + } case P_END: if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH)))) return 1; diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst index 7c76414f0..59fc92d1c 100644 --- a/Test/D02glob.ztst +++ b/Test/D02glob.ztst @@ -177,6 +177,16 @@ >1: [[ path/testy = *((#s)|/)test((#e)|/)* ]] >1: [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]] >1: [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]] +>0: [[ XabcdabcY = X(ab|c|d)(#c5)Y ]] +>0: [[ XabcdabcY = X(ab|c|d)(#c1,5)Y ]] +>0: [[ XabcdabcY = X(ab|c|d)(#c5,8)Y ]] +>0: [[ XabcdabcY = X(ab|c|d)(#c4,)Y ]] +>1: [[ XabcdabcY = X(ab|c|d)(#c6,)Y ]] +>1: [[ XabcdabcY = X(ab|c|d)(#c1,4)Y ]] +>0: [[ ZX = Z(|)(#c1)X ]] +>0: [[ froofroo = (fro(#c2))(#c2) ]] +>1: [[ froofroofroo = (fro(#c2))(#c2) ]] +>1: [[ froofro = (fro(#c2))(#c2) ]] >0 tests failed. globtest globtests.ksh @@ -357,3 +367,7 @@ > [[:IFSSPACE:]] yes >/ [[:WORD:]] no >/ [[:WORD:]] yes + + [[ foo = (#c0)foo ]] +1:Misplaced (#c...) flag +?(eval):1: bad pattern: (#c0)foo -- cgit 1.4.1