From 9471bbcfa06a87dcb6803a36d1208c214fa56003 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Wed, 1 Nov 2006 12:25:18 +0000 Subject: 22934, modified, see 22937: add HIST_SUBST_PATTERN option make ${.../#%...} anchor at both ends --- Src/glob.c | 27 +++++++++++++-- Src/hist.c | 109 +++++++++++++++++++++++++++++++++++++++++++--------------- Src/options.c | 1 + Src/subst.c | 34 ++++++++++-------- Src/zsh.h | 4 +++ 5 files changed, 130 insertions(+), 45 deletions(-) (limited to 'Src') diff --git a/Src/glob.c b/Src/glob.c index 130f8e0c2..201427bdb 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2294,6 +2294,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) return 1; } if (matched) { + /* + * The default behaviour is to match at the start; this + * is modified by SUB_END and SUB_SUBSTR. SUB_END matches + * at the end of the string instead of the start. SUB_SUBSTR + * without SUB_END matches substrings searching from the start; + * with SUB_END it matches substrings searching from the end. + * + * The possibilities are further modified by whether we want the + * longest (SUB_LONG) or shortest possible match. + * + * SUB_START is only used in the case where we are also + * forcing a match at the end (SUB_END with no SUB_SUBSTR, + * with or without SUB_LONG), to indicate we should match + * the entire string. + */ switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) { case 0: case SUB_LONG: @@ -2341,13 +2356,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) set_pat_start(p, t-s); if (pattrylen(p, t, s + l - t, umlen, ioff)) tmatch = t; + if (fl & SUB_START) + break; umlen -= iincchar(&t); } if (tmatch) { *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, repllist); return 1; } - if (pattrylen(p, s + l, 0, 0, ioff)) { + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { *sp = get_match_ret(*sp, l, l, fl, replstr, repllist); return 1; } @@ -2364,8 +2381,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) *sp = get_match_ret(*sp, t-s, l, fl, replstr, repllist); return 1; } + if (fl & SUB_START) + break; umlen -= iincchar(&t); } + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { + *sp = get_match_ret(*sp, l, l, fl, replstr, repllist); + return 1; + } break; case SUB_SUBSTR: @@ -2566,7 +2589,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) /* munge the whole string: no match, so no replstr */ *sp = get_match_ret(*sp, 0, 0, fl, 0, 0); - return 1; + return (fl & SUB_RETFAIL) ? 0 : 1; } /**/ diff --git a/Src/hist.c b/Src/hist.c index 33c4035bf..68163181e 100644 --- a/Src/hist.c +++ b/Src/hist.c @@ -323,7 +323,8 @@ getsubsargs(char *subline, int *gbalp, int *cflagp) if (strlen(ptr1)) { zsfree(hsubl); hsubl = ptr1; - } + } else if (!hsubl) /* fail silently on this */ + return 0; zsfree(hsubr); hsubr = ptr2; follow = ingetc(); @@ -337,11 +338,6 @@ getsubsargs(char *subline, int *gbalp, int *cflagp) } } else inungetc(follow); - if (hsubl && !strstr(subline, hsubl)) { - herrflush(); - zerr("substitution failed"); - return 1; - } return 0; } @@ -354,6 +350,15 @@ getargc(Histent ehist) return ehist->nwords ? ehist->nwords-1 : 0; } +/**/ +static int +substfailed(void) +{ + herrflush(); + zerr("substitution failed"); + return -1; +} + /* Perform history substitution, returning the next character afterwards. */ /**/ @@ -376,10 +381,15 @@ histsubchar(int c) isfirstch = 0; inungetc(hatchar); if (!(ehist = gethist(defev)) - || !(sline = getargs(ehist, 0, getargc(ehist))) - || getsubsargs(sline, &gbal, &cflag) || !hsubl) + || !(sline = getargs(ehist, 0, getargc(ehist)))) return -1; - subst(&sline, hsubl, hsubr, gbal); + + if (getsubsargs(sline, &gbal, &cflag)) + return substfailed(); + if (!hsubl) + return -1; + if (subst(&sline, hsubl, hsubr, gbal)) + return substfailed(); } else { /* Line doesn't begin ^foo^bar */ if (c != ' ') @@ -608,9 +618,10 @@ histsubchar(int c) if (getsubsargs(sline, &gbal, &cflag)) return -1; /* fall through */ case '&': - if (hsubl && hsubr) - subst(&sline, hsubl, hsubr, gbal); - else { + if (hsubl && hsubr) { + if (subst(&sline, hsubl, hsubr, gbal)) + return substfailed(); + } else { herrflush(); zerr("no previous substitution"); return -1; @@ -1629,30 +1640,72 @@ casemodify(char *str, int how) return str2; } + +/* + * Substitute "in" for "out" in "*strptr" and update "*strptr". + * If "gbal", do global substitution. + * + * This returns a result from the heap. There seems to have + * been some confusion on this point. + */ + /**/ -void +int subst(char **strptr, char *in, char *out, int gbal) { - char *str = *strptr, *instr = *strptr, *substcut, *sptr, *oldstr; + char *str = *strptr, *substcut, *sptr; int off, inlen, outlen; if (!*in) in = str, gbal = 0; - if (!(substcut = (char *)strstr(str, in))) - return; - inlen = strlen(in); - sptr = convamps(out, in, inlen); - outlen = strlen(sptr); - do { - *substcut = '\0'; - off = substcut - *strptr + outlen; - substcut += inlen; - *strptr = tricat(oldstr = *strptr, sptr, substcut); - if (oldstr != instr) - zsfree(oldstr); - str = (char *)*strptr + off; - } while (gbal && (substcut = (char *)strstr(str, in))); + if (isset(HISTSUBSTPATTERN)) { + int fl = SUB_LONG|SUB_REST|SUB_RETFAIL; + char *oldin = in; + if (gbal) + fl |= SUB_GLOBAL; + if (*in == '#' || *in == Pound) { + /* anchor at head, flag needed if SUB_END is also set */ + fl |= SUB_START; + in++; + } + if (*in == '%') { + /* anchor at tail */ + in++; + fl |= SUB_END; + } + if (in == oldin) { + /* no anchor, substring match */ + fl |= SUB_SUBSTR; + } + if (in == str) + in = dupstring(in); + if (parse_subst_string(in) || errflag) + return 1; + if (parse_subst_string(out) || errflag) + return 1; + singsub(&in); + if (getmatch(strptr, in, fl, 1, out)) + return 0; + } else { + if ((substcut = (char *)strstr(str, in))) { + inlen = strlen(in); + sptr = convamps(out, in, inlen); + outlen = strlen(sptr); + + do { + *substcut = '\0'; + off = substcut - *strptr + outlen; + substcut += inlen; + *strptr = zhtricat(*strptr, sptr, substcut); + str = (char *)*strptr + off; + } while (gbal && (substcut = (char *)strstr(str, in))); + + return 0; + } + } + + return 1; } /**/ diff --git a/Src/options.c b/Src/options.c index d30553050..821773d2f 100644 --- a/Src/options.c +++ b/Src/options.c @@ -137,6 +137,7 @@ static struct optname optns[] = { {{NULL, "histignorespace", 0}, HISTIGNORESPACE}, {{NULL, "histnofunctions", 0}, HISTNOFUNCTIONS}, {{NULL, "histnostore", 0}, HISTNOSTORE}, +{{NULL, "histsubstpattern", OPT_EMULATE}, HISTSUBSTPATTERN}, {{NULL, "histreduceblanks", 0}, HISTREDUCEBLANKS}, {{NULL, "histsavebycopy", OPT_ALL}, HISTSAVEBYCOPY}, {{NULL, "histsavenodups", 0}, HISTSAVENODUPS}, diff --git a/Src/subst.c b/Src/subst.c index 8ef8d446e..abc3c82af 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -2355,15 +2355,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) c = *++s; } /* Check for anchored substitution */ - if (c == '%') { + if (c == '#' || c == Pound) { + /* + * anchor at head: this is the `normal' case in + * getmatch and we only require the flag if SUB_END + * is also present. + */ + flags |= SUB_START; + s++; + } + if (*s == '%') { /* anchor at tail */ flags |= SUB_END; s++; - } else if (c == '#' || c == Pound) { - /* anchor at head: this is the `normal' case in getmatch */ - s++; - } else + } + if (!(flags & (SUB_START|SUB_END))) { + /* No anchor, so substring */ flags |= SUB_SUBSTR; + } /* * Find the / marking the end of the search pattern. * If there isn't one, we're just going to delete that, @@ -2526,7 +2535,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) /* This once was executed only `if (qt) ...'. But with that * patterns in a expansion resulting from a ${(e)...} aren't * tokenized even though this function thinks they are (it thinks - * they are because subst_parse_str() turns Qstring tokens + * they are because parse_subst_str() turns Qstring tokens * into String tokens and for unquoted parameter expansions the * lexer normally does tokenize patterns inside parameter * expansions). */ @@ -3273,6 +3282,7 @@ modify(char **str, char **ptr) break; case 's': + /* TODO: multibyte delimiter */ c = **ptr; (*ptr)++; ptr1 = *ptr; @@ -3298,7 +3308,8 @@ modify(char **str, char **ptr) for (tt = hsubl; *tt; tt++) if (inull(*tt) && *tt != Bnullkeep) chuck(tt--); - untokenize(hsubl); + if (!isset(HISTSUBSTPATTERN)) + untokenize(hsubl); for (tt = hsubr = ztrdup(ptr2); *tt; tt++) if (inull(*tt) && *tt != Bnullkeep) chuck(tt--); @@ -3444,15 +3455,8 @@ modify(char **str, char **ptr) *str = casemodify(*str, CASMOD_UPPER); break; case 's': - if (hsubl && hsubr) { - char *oldstr = *str; - + if (hsubl && hsubr) subst(str, hsubl, hsubr, gbal); - if (*str != oldstr) { - *str = dupstring(oldstr = *str); - zsfree(oldstr); - } - } break; case 'q': *str = quotestring(*str, NULL, QT_BACKSLASH); diff --git a/Src/zsh.h b/Src/zsh.h index c73ae3b9a..1c693fef4 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -1405,6 +1405,9 @@ struct tieddata { #define SUB_ALL 0x0100 /* match complete string */ #define SUB_GLOBAL 0x0200 /* global substitution ${..//all/these} */ #define SUB_DOSUBST 0x0400 /* replacement string needs substituting */ +#define SUB_RETFAIL 0x0800 /* return status 0 if no match */ +#define SUB_START 0x1000 /* force match at start with SUB_END + * and no SUB_SUBSTR */ /* Flags as the second argument to prefork */ #define PF_TYPESET 0x01 /* argument handled like typeset foo=bar */ @@ -1631,6 +1634,7 @@ enum { HISTREDUCEBLANKS, HISTSAVEBYCOPY, HISTSAVENODUPS, + HISTSUBSTPATTERN, HISTVERIFY, HUP, IGNOREBRACES, -- cgit 1.4.1