From f9d7651c2554bb5db0373f63185ff358f795ab3c Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 28 Sep 2015 20:31:51 +0100 Subject: 36682: expand pattern interface to optimise unmetafication --- ChangeLog | 6 + Src/Zle/complist.c | 5 +- Src/Zle/zle_hist.c | 4 +- Src/glob.c | 56 +++++---- Src/pattern.c | 337 +++++++++++++++++++++++++++++++++++++---------------- Src/zsh.h | 10 ++ 6 files changed, 288 insertions(+), 130 deletions(-) diff --git a/ChangeLog b/ChangeLog index 546620d6b..38e0e4627 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-09-28 Peter Stephenson + + * 36682: Src/glob.c, Src/pattern.c, Src/zsh.h, + Src/Zle/complist,c, Src/Zle/zle_hist.c: expand pattern interface + to allow unmetafying trial string once for reuse. + 2015-09-28 Daniel Shahaf * unposted: Test/D04parameter.ztst: Test for 36669 diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c index 433701514..986ad31ea 100644 --- a/Src/Zle/complist.c +++ b/Src/Zle/complist.c @@ -868,7 +868,7 @@ putmatchcol(char *group, char *n) nrefs = MAX_POS - 1; if ((!pc->prog || !group || pattry(pc->prog, group)) && - pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) { + pattryrefs(pc->pat, n, -1, -1, NULL, 0, &nrefs, begpos, endpos)) { if (pc->cols[1]) { patcols = pc->cols; @@ -900,7 +900,8 @@ putfilecol(char *group, char *filename, mode_t m, int special) nrefs = MAX_POS - 1; if ((!pc->prog || !group || pattry(pc->prog, group)) && - pattryrefs(pc->pat, filename, -1, -1, 0, &nrefs, begpos, endpos)) { + pattryrefs(pc->pat, filename, -1, -1, NULL, + 0, &nrefs, begpos, endpos)) { if (pc->cols[1]) { patcols = pc->cols; diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c index 0cff0391a..95d96c95c 100644 --- a/Src/Zle/zle_hist.c +++ b/Src/Zle/zle_hist.c @@ -1306,8 +1306,8 @@ doisearch(char **args, int dir, int pattern) * this mode. */ if (!skip_pos && - pattryrefs(patprog, zt, -1, -1, 0, NULL, NULL, - &end_pos)) + pattryrefs(patprog, zt, -1, -1, NULL, 0, + NULL, NULL, &end_pos)) t = zt; } else { if (!matchlist && !skip_pos) { diff --git a/Src/glob.c b/Src/glob.c index fa3ce25f4..8bf73520f 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2780,7 +2780,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, p->flags &= ~(PAT_NOTSTART|PAT_NOTEND); if (fl & SUB_ALL) { - int i = matched && pattry(p, s); + int i = matched && pattrylen(p, s, -1, -1, NULL, 0); *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL); if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) return 0; @@ -2809,7 +2809,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * Largest/smallest possible match at head of string. * First get the longest match... */ - if (pattry(p, s)) { + if (pattrylen(p, s, -1, -1, NULL, 0)) { /* patmatchlen returns metafied length, as we need */ int mlen = patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { @@ -2820,7 +2820,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (t = s, umlen = 0; t < s + mlen; ) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, umlen, 0)) { + if (pattrylen(p, s, t - s, umlen, NULL, 0)) { mlen = patmatchlen(); break; } @@ -2847,7 +2847,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, tmatch = NULL; for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) tmatch = t; if (fl & SUB_START) break; @@ -2857,7 +2857,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL); return 1; } - if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -2870,7 +2870,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); return 1; } @@ -2878,7 +2878,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, break; umlen -= iincchar(&t); } - if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -2887,7 +2887,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, case SUB_SUBSTR: /* Smallest at start, but matching substrings. */ set_pat_start(p, l); - if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) { + if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) && + !--n) { *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL); return 1; } /* fall through */ @@ -2908,7 +2909,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (; t < s + l; ioff++) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; @@ -2922,7 +2923,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ for (ptr = t, umlen2 = 0; ptr < mpos;) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, + NULL, ioff)) { mpos = t + patmatchlen(); break; } @@ -2970,7 +2972,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ set_pat_start(p, l); if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && - pattry(p, s + l) && !--n) { + pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) { *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist); return 1; } @@ -2981,7 +2983,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, 0, 0, umltot) && !--n) { + if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -3001,7 +3003,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { nmatches++; tmatch = t; } @@ -3017,7 +3019,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff) && + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !n--) { tmatch = t; break; @@ -3030,7 +3032,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { for (t = tmatch, umlen = 0; t < mpos; ) { set_pat_end(p, *t); - if (pattrylen(p, tmatch, t - tmatch, umlen, ioff)) { + if (pattrylen(p, tmatch, t - tmatch, umlen, + NULL, ioff)) { mpos = tmatch + patmatchlen(); break; } @@ -3042,7 +3045,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, return 1; } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, umltot) && !--n) { + if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) && + !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -3167,7 +3171,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, umlen, 0)) { + if (pattrylen(p, s, t - s, umlen, NULL, 0)) { mlen = patmatchlen(); break; } @@ -3187,7 +3191,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (t > s && t[-1] == Meta) t--; set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL); return 1; } @@ -3203,7 +3207,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (ioff = 0, t = s, umlen = uml; t < s + l; ioff++, METAINC(t), umlen--) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); return 1; } @@ -3235,7 +3239,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (; t < s + l; METAINC(t), ioff++, umlen--) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; @@ -3243,7 +3247,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (ptr = t, umlen2 = 0; ptr < mpos; METAINC(ptr), umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, + NULL, ioff)) { mpos = t + patmatchlen(); break; } @@ -3300,7 +3305,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, 0, 0, uml) && !--n) { + if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -3310,7 +3315,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (t > s && t[-1] == Meta) t--; set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) { /* Found the longest match */ char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { @@ -3319,7 +3324,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (ptr = t, umlen2 = 0; ptr < mpos; METAINC(ptr), umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) { mpos = t + patmatchlen(); break; } @@ -3331,7 +3336,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) { + if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) && + !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } diff --git a/Src/pattern.c b/Src/pattern.c index af56bd9cc..03ba37d92 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2022,6 +2022,131 @@ pattrystart(void) errsfound = 0; } +/* + * Allocate memeory for pattern match. Note this is specific to use + * of pattern *and* trial string. + * + * Unmetafy a trial string for use in pattern matching, if needed. + * + * If it is needed, returns a zalloc()'d string; if not needed, returns + * NULL. + * + * prog is the pattern to be executed. + * string is the metafied trial string. + * stringlen is it's length; it will be calculated if it's negative + * (this is a simple strlen()). + * unmetalen is the unmetafied length of the string, may be -1. + * force is 1 if we always unmetafy: this is useful if we are going + * to try again with different versions of the string. If this is + * called from pattryrefs() we don't force unmetafication as it won't + * be optimal. + * In patstralloc (supplied by caller, must last until last pattry is done) + * unmetalen is the unmetafied length of the string; it will be + * calculated if the input value is negative. + * unmetalenp is the umetafied length of a path segment preceeding + * the trial string needed for file mananagement; it is calculated as + * needed so does not need to be initialised. + * alloced is the memory allocated --- same as return value from + * function. + */ +/**/ +mod_export +char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen, + int force, Patstralloc patstralloc) +{ + int needfullpath; + + /* + * For a top-level ~-exclusion, we will need the full + * path to exclude, so copy the path so far and append the + * current test string. + */ + needfullpath = (prog->flags & PAT_HAS_EXCLUDP) && pathpos; + + /* Get the length of the full string when unmetafied. */ + if (unmetalen < 0) + patstralloc->unmetalen = ztrsub(string + stringlen, string); + else + patstralloc->unmetalen = unmetalen; + if (needfullpath) { + patstralloc->unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); + if (!patstralloc->unmetalenp) + needfullpath = 0; + } else + patstralloc->unmetalenp = 0; + /* Initialise cache area */ + patstralloc->progstrunmeta = NULL; + patstralloc->progstrunmetalen = 0; + + DPUTS(needfullpath && (prog->flags & (PAT_PURES|PAT_ANY)), + "rum sort of file exclusion"); + /* + * Partly for efficiency, and partly for the convenience of + * globbing, we don't unmetafy pure string patterns, and + * there's no reason to if the pattern is just a *. + */ + if (force || + (!(prog->flags & (PAT_PURES|PAT_ANY)) + && (needfullpath || patstralloc->unmetalen != stringlen))) { + /* + * We need to copy if we need to prepend the path so far + * (in which case we copy both chunks), or if we have + * Meta characters. + */ + char *dst, *ptr; + int i, icopy, ncopy; + + dst = patstralloc->alloced = + zalloc(patstralloc->unmetalen + patstralloc->unmetalenp); + + if (needfullpath) { + /* loop twice, copy path buffer first time */ + ptr = pathbuf; + ncopy = patstralloc->unmetalenp; + } else { + /* just loop once, copy string with unmetafication */ + ptr = string; + ncopy = patstralloc->unmetalen; + } + for (icopy = 0; icopy < 2; icopy++) { + for (i = 0; i < ncopy; i++) { + if (*ptr == Meta) { + ptr++; + *dst++ = *ptr++ ^ 32; + } else { + *dst++ = *ptr++; + } + } + if (!needfullpath) + break; + /* next time append test string to path so far */ + ptr = string; + ncopy = patstralloc->unmetalen; + } + } + else + { + patstralloc->alloced = NULL; + } + + return patstralloc->alloced; +} + + +/* + * Free memory allocated by patallocstr(). + */ + +/**/ +mod_export +void patfreestr(Patstralloc patstralloc) +{ + if (patstralloc->alloced) + zfree(patstralloc->alloced, + patstralloc->unmetalen + patstralloc->unmetalenp); +} + + /* * Test prog against null-terminated, metafied string. */ @@ -2030,7 +2155,7 @@ pattrystart(void) mod_export int pattry(Patprog prog, char *string) { - return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL); + return pattryrefs(prog, string, -1, -1, NULL, 0, NULL, NULL, NULL); } /* @@ -2041,9 +2166,11 @@ pattry(Patprog prog, char *string) /**/ mod_export int -pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) +pattrylen(Patprog prog, char *string, int len, int unmetalen, + Patstralloc patstralloc, int offset) { - return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL); + return pattryrefs(prog, string, len, unmetalen, patstralloc, offset, + NULL, NULL, NULL); } /* @@ -2055,14 +2182,32 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) * there may be a severe penalty for this if a lot of matching is done * on one string. * - * offset is the position in the original string (not seen by + * If patstralloc is not NULL it is used to optimise unmetafication + * of a trial string that may be passed (or any substring may be passed) to + * pattryrefs multiple times or the same pattern (N.B. so patstralloc + * depends on both prog *and* the trial string). This should only be + * done if there is no path prefix (pathpos == 0) as otherwise the path + * buffer and unmetafied string may not match. To do this, + * patallocstr() is callled (use force = 1 to ensure it is alway + * unmetafied); paststralloc points to existing storage. When all + * pattern matching is done, patfreestr() is called. + * patstralloc->alloced and patstralloc->unmetalen contain the + * unmetafied string and its length. In that case, the rules for the + * earlier arguments change: + * - string is an unmetafied string + * - stringlen is its unmetafied (i.e. actual) length + * - unmetalenin is not used. + * string and stringlen may refer to arbitrary substrings of + * patstralloc->alloced without any internal modification to patstralloc. + * + * patoffset is the position in the original string (not seen by * the pattern module) at which we are trying to match. * This is added in to the positions recorded in patbeginp and patendp * when we are looking for substrings. Currently this only happens * in the parameter substitution code. * - * Note this is a character offset, i.e. a metafied character - * counts as 1. + * Note this is a character offset, i.e. a single possibly metafied and + * possibly multibyte character counts as 1. * * The last three arguments are used to report the positions for the * backreferences. On entry, *nump should contain the maximum number @@ -2075,14 +2220,15 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) /**/ mod_export int -pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, - int patoffset, +pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, + Patstralloc patstralloc, int patoffset, int *nump, int *begp, int *endp) { - int i, maxnpos = 0, ret, needfullpath, unmetalenp; + int i, maxnpos = 0, ret; int origlen; - char **sp, **ep, *tryalloced, *ptr; + char **sp, **ep, *ptr; char *progstr = (char *)prog + prog->startoff; + struct patstralloc patstralloc_struct; if (nump) { maxnpos = *nump; @@ -2091,86 +2237,38 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, /* inherited from domatch, but why, exactly? */ if (*string == Nularg) { string++; - unmetalen--; + if (unmetalenin > 0) + unmetalenin--; + if (stringlen > 0) + stringlen--; } if (stringlen < 0) stringlen = strlen(string); origlen = stringlen; - patflags = prog->flags; - /* - * For a top-level ~-exclusion, we will need the full - * path to exclude, so copy the path so far and append the - * current test string. - */ - needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos; - - /* Get the length of the full string when unmetafied. */ - if (unmetalen < 0) - unmetalen = ztrsub(string + stringlen, string); - if (needfullpath) - unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); - else - unmetalenp = 0; - - DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)), - "rum sort of file exclusion"); - /* - * Partly for efficiency, and partly for the convenience of - * globbing, we don't unmetafy pure string patterns, and - * there's no reason to if the pattern is just a *. - */ - if (!(patflags & (PAT_PURES|PAT_ANY)) - && (needfullpath || unmetalen != stringlen)) { - /* - * We need to copy if we need to prepend the path so far - * (in which case we copy both chunks), or if we have - * Meta characters. - */ - char *dst; - int icopy, ncopy; - - dst = tryalloced = zalloc(unmetalen + unmetalenp); - - if (needfullpath) { - /* loop twice, copy path buffer first time */ - ptr = pathbuf; - ncopy = unmetalenp; - } else { - /* just loop once, copy string with unmetafication */ - ptr = string; - ncopy = unmetalen; - } - for (icopy = 0; icopy < 2; icopy++) { - for (i = 0; i < ncopy; i++) { - if (*ptr == Meta) { - ptr++; - *dst++ = *ptr++ ^ 32; - } else { - *dst++ = *ptr++; - } - } - if (!needfullpath) - break; - /* next time append test string to path so far */ - ptr = string; - ncopy = unmetalen; - } - - if (needfullpath) { - patinstart = tryalloced + unmetalenp; - patinpath = tryalloced; - } else { - patinstart = tryalloced; - patinpath = NULL; - } - stringlen = unmetalen; - } else { + if (patstralloc) { + DPUTS(!patstralloc->alloced, + "External unmetafy didn't actually unmetafy."); + DPUTS(patstralloc->unmetalenp, + "Ooh-err: pathpos with external unmetafy. I have bad vibes."); + patinpath = NULL; patinstart = string; - tryalloced = patinpath = NULL; + /* stringlen is unmetafied length; unmetalenin is ignored */ + } else { + patstralloc = &patstralloc_struct; + if (patallocstr(prog, string, stringlen, unmetalenin, 0, patstralloc)) { + patinstart = patstralloc->alloced + patstralloc->unmetalenp; + stringlen = patstralloc->unmetalen; + } else + patinstart = string; + if (patstralloc->unmetalenp) + patinpath = patstralloc->alloced; + else + patinpath = NULL; } + patflags = prog->flags; patinend = patinstart + stringlen; /* * From now on we do not require NULL termination of @@ -2183,7 +2281,30 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, * Either we are testing against a pure string, * or we can match anything at all. */ - int ret; + int ret, pstrlen; + char *pstr; + if (patstralloc->alloced) + { + /* + * Unmetafied; we need pattern sring that's also unmetafied. + * We'll cache it in the patstralloc structure. + * Note it's on the heap. + */ + if (!patstralloc->progstrunmeta) + { + patstralloc->progstrunmeta = dupstring(progstr); + unmetafy(patstralloc->progstrunmeta, + &patstralloc->progstrunmetalen); + } + pstr = patstralloc->progstrunmeta; + pstrlen = patstralloc->progstrunmetalen; + } + else + { + /* Metafied. */ + pstr = progstr; + pstrlen = (int)prog->patmlen; + } if (prog->flags & PAT_ANY) { /* * Optimisation for a single "*": always matches @@ -2195,11 +2316,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, * Testing a pure string. See if initial * components match. */ - int lendiff = stringlen - prog->patmlen; + int lendiff = stringlen - pstrlen; if (lendiff < 0) { /* No, the pattern string is too long. */ ret = 0; - } else if (!memcmp(progstr, patinstart, prog->patmlen)) { + } else if (!memcmp(pstr, patinstart, pstrlen)) { /* * Initial component matches. Matches either * if lengths are the same or we are not anchored @@ -2221,7 +2342,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } else { /* * Remember the length in case used for ${..#..} etc. - * In this case, we didn't unmetafy the string. + * In this case, we didn't unmetafy the pattern string + * In the orignal structure, but it might be unmetafied + * for use with an unmetafied test string. */ patinlen = (int)prog->patmlen; /* if matching files, must update globbing flags */ @@ -2229,16 +2352,26 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) { - char *str = ztrduppfx(patinstart, patinlen); + char *str; int mlen; - /* - * Count the characters. We're not using CHARSUB() - * because the string is still metafied. - */ - MB_METACHARINIT(); - mlen = MB_METASTRLEN2END(patinstart, 0, - patinstart + patinlen); + if (patstralloc->alloced) { + /* + * Unmetafied: pstrlen contains unmetafied + * length in bytes. + */ + str = metafy(patinstart, pstrlen, META_ALLOC); + mlen = CHARSUB(patinstart, patinstart + pstrlen); + } else { + str = ztrduppfx(patinstart, patinlen); + /* + * Count the characters. We're not using CHARSUB() + * because the string is still metafied. + */ + MB_METACHARINIT(); + mlen = MB_METASTRLEN2END(patinstart, 0, + patinstart + patinlen); + } setsparam("MATCH", str); setiparam("MBEGIN", @@ -2250,9 +2383,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } } - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); - + if (patstralloc == &patstralloc_struct) + patfreestr(patstralloc); return ret; } else { int q = queue_signal_level(); @@ -2289,8 +2421,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } } if (!ret) { - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); + if (patstralloc == &patstralloc_struct) + patfreestr(patstralloc); return 0; } @@ -2322,8 +2454,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, /* * Optimization: if we didn't find any Meta characters * to begin with, we don't need to look for them now. + * Only do this if we did the unmetfication internally, + * since otherwise it's too hard to work out. */ - if (unmetalen != origlen) { + if (patstralloc == &patstralloc_struct && + patstralloc->unmetalen != origlen) { for (ptr = patinstart; ptr < patinput; ptr++) if (imeta(*ptr)) patinlen++; @@ -2444,8 +2579,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, restore_queue_signals(q); - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); + if (patstralloc == &patstralloc_struct) + patfreestr(patstralloc); return ret; } diff --git a/Src/zsh.h b/Src/zsh.h index dd0596116..32f2e0cb2 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -491,6 +491,7 @@ typedef struct options *Options; typedef struct optname *Optname; typedef struct param *Param; typedef struct paramdef *Paramdef; +typedef struct patstralloc *Patstralloc; typedef struct patprog *Patprog; typedef struct prepromptfn *Prepromptfn; typedef struct process *Process; @@ -1470,6 +1471,15 @@ struct patprog { char patstartch; }; +struct patstralloc { + int unmetalen; /* Unmetafied length of trial string */ + int unmetalenp; /* Unmetafied length of path prefix. + If 0, no path prefix. */ + char *alloced; /* Allocated string, may be NULL */ + char *progstrunmeta; /* Unmetafied pure string in pattern, cached */ + int progstrunmetalen; /* Length of the foregoing */ +}; + /* Flags used in pattern matchers (Patprog) and passed down to patcompile */ #define PAT_FILE 0x0001 /* Pattern is a file name */ -- cgit 1.4.1