diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2006-11-01 12:25:18 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2006-11-01 12:25:18 +0000 |
commit | 9471bbcfa06a87dcb6803a36d1208c214fa56003 (patch) | |
tree | f5edcdab07ad4b718037fed3b15a7bbb3e4961f2 | |
parent | e8d1ef3613e4c43e0b0c08ac2a124f4cc99ab652 (diff) | |
download | zsh-9471bbcfa06a87dcb6803a36d1208c214fa56003.tar.gz zsh-9471bbcfa06a87dcb6803a36d1208c214fa56003.tar.xz zsh-9471bbcfa06a87dcb6803a36d1208c214fa56003.zip |
22934, modified, see 22937: add HIST_SUBST_PATTERN option
make ${.../#%...} anchor at both ends
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | Completion/compinit | 17 | ||||
-rw-r--r-- | Doc/Zsh/expn.yo | 51 | ||||
-rw-r--r-- | Doc/Zsh/options.yo | 9 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | README | 7 | ||||
-rw-r--r-- | Src/glob.c | 27 | ||||
-rw-r--r-- | Src/hist.c | 109 | ||||
-rw-r--r-- | Src/options.c | 1 | ||||
-rw-r--r-- | Src/subst.c | 34 | ||||
-rw-r--r-- | Src/zsh.h | 4 | ||||
-rw-r--r-- | Test/E01options.ztst | 14 |
12 files changed, 222 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog index 322c96862..f0709eda9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2006-11-01 Peter Stephenson <pws@csr.com> + + * 22934, with modifications (c.f. 22937): INSTALL, README, + Completion/compinit, Doc/Zsh/expn.yo, Doc/Zsh/options.yo, + Src/glob.c, Src/hist.c, Src/options.c, Src/subst.c, Src/zsh.h, + Test/E01options.ztst: add HIST_SUBST_PATTERN option, make + ${.../#%...} anchor at both ends. + 2006-11-01 Clint Adams <clint@zsh.org> * 22940: R. Ramkumar: Completion/Unix/Command/_mkdir: diff --git a/Completion/compinit b/Completion/compinit index a78fd1ce2..ad24ac7a5 100644 --- a/Completion/compinit +++ b/Completion/compinit @@ -128,25 +128,26 @@ fi # The standard options set in completion functions. _comp_options=( - extendedglob bareglobqual + extendedglob glob multibyte nullglob rcexpandparam unset - NO_markdirs + NO_allexport + NO_aliases + NO_cshnullglob + NO_errexit NO_globsubst - NO_shwordsplit - NO_shglob + NO_histsubstpattern NO_kshglob NO_ksharrays NO_kshtypeset - NO_cshnullglob - NO_allexport - NO_aliases - NO_errexit + NO_markdirs NO_octalzeroes + NO_shwordsplit + NO_shglob NO_warncreateglobal ) diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 6aa31c76a..584977c18 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -258,7 +258,8 @@ item(tt(s/)var(l)tt(/)var(r)[tt(/)])( Substitute var(r) for var(l) as described below. The substitution is done only for the first string that matches var(l). For arrays and for filename -generation, this applies to each word of the expanded text. +generation, this applies to each word of the expanded text. See +below for further notes on substitutions. The forms `tt(gs/)var(l)tt(/)var(r)' and `tt(s/)var(l)tt(/)var(r)tt(/:G)' perform global substitution, i.e. substitute every occurrence of var(r) @@ -273,8 +274,8 @@ backslash. ) enditem() -The tt(s/l/r/) substitution works as follows. The left-hand side of -substitutions are not regular expressions, but character strings. Any +The tt(s/l/r/) substitution works as follows. By default the left-hand +side of substitutions are not patterns, but character strings. Any character can be used as the delimiter in place of `tt(/)'. A backslash quotes the delimiter character. The character `tt(&)', in the right-hand-side var(r), is replaced by the text from the @@ -286,6 +287,41 @@ the rightmost `tt(?)' in a context scan can similarly be omitted. Note the same record of the last var(l) and var(r) is maintained across all forms of expansion. +If the option tt(HIST_SUBST_PATTERN) is set, var(l) is treated as +a pattern of the usual form desribed in +ifzman(the section FILENAME GENERATION below)\ +ifnzman(noderef(Filename Generation)). This can be used in +all the places where modifiers are available; note, however, that +in globbing qualifiers parameter substitution has already taken place, +so parameters in the replacement string should be quoted to ensure +they are replaced at the correct time. +Note also that complicated patterns used in globbing qualifiers may +need the extended glob qualifier notation +tt(LPAR()#q:s/)var(...)tt(/)var(...)tt(/RPAR()) in order for the +shell to recognize the expression as a glob qualifer. Further, +note that bad patterns in the substitution are not subject to +the tt(NO_BAD_PATTERN) option so will cause an error. + +When tt(HIST_SUBST_PATTERN) is set, var(l) may start with a tt(#) +to indicate that the pattern must match at the start of the string +to be substituted, and a tt(%) may appear at the start or after an tt(#) +to indicate that the pattern must match at the end of the string +to be substituted. The tt(%) or tt(#) may be quoted with two +backslashes. + +For example, the following piece of filename generation code +with the tt(EXTENDED_GLOB) option: + +example(print *.c+LPAR()#q:s/#%+LPAR()#b+RPAR()s+LPAR()*+RPAR().c/'S${match[1]}.C'/+RPAR()) + +takes the expansion of tt(*.c) and applies the glob qualifiers in the +tt(LPAR()#q)var(...)tt(RPAR()) expression, which consists of a substitution +modifier anchored to the start and end of each word (tt(#%)). This +turns on backreferences (tt(LPAR()#b+RPAR())), so that the parenthesised +subexpression is available in the replacement string as tt(${match[1]}). +The replacement string is quoted so that the parameter is not substituted +before the start of filename generation. + The following tt(f), tt(F), tt(w) and tt(W) modifiers work only with parameter expansion and filename generation. They are listed here to provide a single point of reference for all modifiers. @@ -530,13 +566,14 @@ substituted as tt(${~opat}). The var(pattern) may begin with a `tt(#)', in which case the var(pattern) must match at the start of the string, or `tt(%)', in -which case it must match at the end of the string. The var(repl) may +which case it must match at the end of the string, or `tt(#%)' in which +case the var(pattern) must match the entire string. The var(repl) may be an empty string, in which case the final `tt(/)' may also be omitted. To quote the final `tt(/)' in other cases it should be preceded by a single backslash; this is not necessary if the -`tt(/)' occurs inside a substituted parameter. Note also that the `tt(#)' -and `tt(%)' are not active if they occur inside a substituted parameter, -even at the start. +`tt(/)' occurs inside a substituted parameter. Note also that the `tt(#)', +`tt(%)' and `tt(#%) are not active if they occur inside a substituted +parameter, even at the start. The first `tt(/)' may be preceded by a `tt(:)', in which case the match will only succeed if it matches the entire word. Note also the diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo index 02d8fa046..d4e1deeef 100644 --- a/Doc/Zsh/options.yo +++ b/Doc/Zsh/options.yo @@ -376,6 +376,15 @@ characters resulting from command substitution as being eligible for filename generation. Braces (and commas in between) do not become eligible for expansion. ) +pindex(HIST_SUBST_PATTERN) +item(tt(HIST_SUBST_PATTERN))( +Substitutions using the tt(:s) and tt(:&) history modifiers are performed +with pattern matching instead of string matching. This occurs wherever +history modifiers are valid, including glob qualifiers and parameters. +See +ifzman(the section Modifiers in zmanref(zshexp))\ +ifnzman(noderef(Modifiers)). +) pindex(IGNORE_BRACES) cindex(disabling brace expansion) cindex(brace expansion, disabling) diff --git a/INSTALL b/INSTALL index 1c76107e0..7f604a27a 100644 --- a/INSTALL +++ b/INSTALL @@ -270,7 +270,7 @@ handled properly (some assistance with this problem would be appreciated). The configuration script should turn on multibyte support on all systems where it can be compiled successfully. -The support can be explicitly enabled or disable with --enable-multibyte or +The support can be explicitly enabled or disabled with --enable-multibyte or --disable-multibyte. The developers are not aware of any need to use --disable-multibyte and this should be reported as a bug. Currently multibyte mode is believed to work on at least the following: diff --git a/README b/README index 78fcf5489..d4ad770a3 100644 --- a/README +++ b/README @@ -49,6 +49,13 @@ The variable HOME is no longer set by the shell if zsh is emulating any other shell at startup; it must be present in the environment or set subsequently by the user. It is valid for the variable to be unset. +Parameter substitutions in the form ${param//#%search/replace} match +against "search" anchored at both ends of the parameter value. Previously +this syntax would have matched against "%search", anchored only at the head +of the value. The form ${param//#$search/replace} where the value +$search starts with "%" considers the "%" to be part of the search +string as before. + The MULTIBYTE option is on by default where it is available; this causes many operations to recognise characters as in the current locale. Older versions of the shell always assumed a character was one byte. diff --git a/Src/glob.c b/Src/glob.c index 130f8e0c2..201427bdb 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2294,6 +2294,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) return 1; } if (matched) { + /* + * The default behaviour is to match at the start; this + * is modified by SUB_END and SUB_SUBSTR. SUB_END matches + * at the end of the string instead of the start. SUB_SUBSTR + * without SUB_END matches substrings searching from the start; + * with SUB_END it matches substrings searching from the end. + * + * The possibilities are further modified by whether we want the + * longest (SUB_LONG) or shortest possible match. + * + * SUB_START is only used in the case where we are also + * forcing a match at the end (SUB_END with no SUB_SUBSTR, + * with or without SUB_LONG), to indicate we should match + * the entire string. + */ switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) { case 0: case SUB_LONG: @@ -2341,13 +2356,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) set_pat_start(p, t-s); if (pattrylen(p, t, s + l - t, umlen, ioff)) tmatch = t; + if (fl & SUB_START) + break; umlen -= iincchar(&t); } if (tmatch) { *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, repllist); return 1; } - if (pattrylen(p, s + l, 0, 0, ioff)) { + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { *sp = get_match_ret(*sp, l, l, fl, replstr, repllist); return 1; } @@ -2364,8 +2381,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) *sp = get_match_ret(*sp, t-s, l, fl, replstr, repllist); return 1; } + if (fl & SUB_START) + break; umlen -= iincchar(&t); } + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { + *sp = get_match_ret(*sp, l, l, fl, replstr, repllist); + return 1; + } break; case SUB_SUBSTR: @@ -2566,7 +2589,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) /* munge the whole string: no match, so no replstr */ *sp = get_match_ret(*sp, 0, 0, fl, 0, 0); - return 1; + return (fl & SUB_RETFAIL) ? 0 : 1; } /**/ diff --git a/Src/hist.c b/Src/hist.c index 33c4035bf..68163181e 100644 --- a/Src/hist.c +++ b/Src/hist.c @@ -323,7 +323,8 @@ getsubsargs(char *subline, int *gbalp, int *cflagp) if (strlen(ptr1)) { zsfree(hsubl); hsubl = ptr1; - } + } else if (!hsubl) /* fail silently on this */ + return 0; zsfree(hsubr); hsubr = ptr2; follow = ingetc(); @@ -337,11 +338,6 @@ getsubsargs(char *subline, int *gbalp, int *cflagp) } } else inungetc(follow); - if (hsubl && !strstr(subline, hsubl)) { - herrflush(); - zerr("substitution failed"); - return 1; - } return 0; } @@ -354,6 +350,15 @@ getargc(Histent ehist) return ehist->nwords ? ehist->nwords-1 : 0; } +/**/ +static int +substfailed(void) +{ + herrflush(); + zerr("substitution failed"); + return -1; +} + /* Perform history substitution, returning the next character afterwards. */ /**/ @@ -376,10 +381,15 @@ histsubchar(int c) isfirstch = 0; inungetc(hatchar); if (!(ehist = gethist(defev)) - || !(sline = getargs(ehist, 0, getargc(ehist))) - || getsubsargs(sline, &gbal, &cflag) || !hsubl) + || !(sline = getargs(ehist, 0, getargc(ehist)))) return -1; - subst(&sline, hsubl, hsubr, gbal); + + if (getsubsargs(sline, &gbal, &cflag)) + return substfailed(); + if (!hsubl) + return -1; + if (subst(&sline, hsubl, hsubr, gbal)) + return substfailed(); } else { /* Line doesn't begin ^foo^bar */ if (c != ' ') @@ -608,9 +618,10 @@ histsubchar(int c) if (getsubsargs(sline, &gbal, &cflag)) return -1; /* fall through */ case '&': - if (hsubl && hsubr) - subst(&sline, hsubl, hsubr, gbal); - else { + if (hsubl && hsubr) { + if (subst(&sline, hsubl, hsubr, gbal)) + return substfailed(); + } else { herrflush(); zerr("no previous substitution"); return -1; @@ -1629,30 +1640,72 @@ casemodify(char *str, int how) return str2; } + +/* + * Substitute "in" for "out" in "*strptr" and update "*strptr". + * If "gbal", do global substitution. + * + * This returns a result from the heap. There seems to have + * been some confusion on this point. + */ + /**/ -void +int subst(char **strptr, char *in, char *out, int gbal) { - char *str = *strptr, *instr = *strptr, *substcut, *sptr, *oldstr; + char *str = *strptr, *substcut, *sptr; int off, inlen, outlen; if (!*in) in = str, gbal = 0; - if (!(substcut = (char *)strstr(str, in))) - return; - inlen = strlen(in); - sptr = convamps(out, in, inlen); - outlen = strlen(sptr); - do { - *substcut = '\0'; - off = substcut - *strptr + outlen; - substcut += inlen; - *strptr = tricat(oldstr = *strptr, sptr, substcut); - if (oldstr != instr) - zsfree(oldstr); - str = (char *)*strptr + off; - } while (gbal && (substcut = (char *)strstr(str, in))); + if (isset(HISTSUBSTPATTERN)) { + int fl = SUB_LONG|SUB_REST|SUB_RETFAIL; + char *oldin = in; + if (gbal) + fl |= SUB_GLOBAL; + if (*in == '#' || *in == Pound) { + /* anchor at head, flag needed if SUB_END is also set */ + fl |= SUB_START; + in++; + } + if (*in == '%') { + /* anchor at tail */ + in++; + fl |= SUB_END; + } + if (in == oldin) { + /* no anchor, substring match */ + fl |= SUB_SUBSTR; + } + if (in == str) + in = dupstring(in); + if (parse_subst_string(in) || errflag) + return 1; + if (parse_subst_string(out) || errflag) + return 1; + singsub(&in); + if (getmatch(strptr, in, fl, 1, out)) + return 0; + } else { + if ((substcut = (char *)strstr(str, in))) { + inlen = strlen(in); + sptr = convamps(out, in, inlen); + outlen = strlen(sptr); + + do { + *substcut = '\0'; + off = substcut - *strptr + outlen; + substcut += inlen; + *strptr = zhtricat(*strptr, sptr, substcut); + str = (char *)*strptr + off; + } while (gbal && (substcut = (char *)strstr(str, in))); + + return 0; + } + } + + return 1; } /**/ diff --git a/Src/options.c b/Src/options.c index d30553050..821773d2f 100644 --- a/Src/options.c +++ b/Src/options.c @@ -137,6 +137,7 @@ static struct optname optns[] = { {{NULL, "histignorespace", 0}, HISTIGNORESPACE}, {{NULL, "histnofunctions", 0}, HISTNOFUNCTIONS}, {{NULL, "histnostore", 0}, HISTNOSTORE}, +{{NULL, "histsubstpattern", OPT_EMULATE}, HISTSUBSTPATTERN}, {{NULL, "histreduceblanks", 0}, HISTREDUCEBLANKS}, {{NULL, "histsavebycopy", OPT_ALL}, HISTSAVEBYCOPY}, {{NULL, "histsavenodups", 0}, HISTSAVENODUPS}, diff --git a/Src/subst.c b/Src/subst.c index 8ef8d446e..abc3c82af 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -2355,15 +2355,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) c = *++s; } /* Check for anchored substitution */ - if (c == '%') { + if (c == '#' || c == Pound) { + /* + * anchor at head: this is the `normal' case in + * getmatch and we only require the flag if SUB_END + * is also present. + */ + flags |= SUB_START; + s++; + } + if (*s == '%') { /* anchor at tail */ flags |= SUB_END; s++; - } else if (c == '#' || c == Pound) { - /* anchor at head: this is the `normal' case in getmatch */ - s++; - } else + } + if (!(flags & (SUB_START|SUB_END))) { + /* No anchor, so substring */ flags |= SUB_SUBSTR; + } /* * Find the / marking the end of the search pattern. * If there isn't one, we're just going to delete that, @@ -2526,7 +2535,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) /* This once was executed only `if (qt) ...'. But with that * patterns in a expansion resulting from a ${(e)...} aren't * tokenized even though this function thinks they are (it thinks - * they are because subst_parse_str() turns Qstring tokens + * they are because parse_subst_str() turns Qstring tokens * into String tokens and for unquoted parameter expansions the * lexer normally does tokenize patterns inside parameter * expansions). */ @@ -3273,6 +3282,7 @@ modify(char **str, char **ptr) break; case 's': + /* TODO: multibyte delimiter */ c = **ptr; (*ptr)++; ptr1 = *ptr; @@ -3298,7 +3308,8 @@ modify(char **str, char **ptr) for (tt = hsubl; *tt; tt++) if (inull(*tt) && *tt != Bnullkeep) chuck(tt--); - untokenize(hsubl); + if (!isset(HISTSUBSTPATTERN)) + untokenize(hsubl); for (tt = hsubr = ztrdup(ptr2); *tt; tt++) if (inull(*tt) && *tt != Bnullkeep) chuck(tt--); @@ -3444,15 +3455,8 @@ modify(char **str, char **ptr) *str = casemodify(*str, CASMOD_UPPER); break; case 's': - if (hsubl && hsubr) { - char *oldstr = *str; - + if (hsubl && hsubr) subst(str, hsubl, hsubr, gbal); - if (*str != oldstr) { - *str = dupstring(oldstr = *str); - zsfree(oldstr); - } - } break; case 'q': *str = quotestring(*str, NULL, QT_BACKSLASH); diff --git a/Src/zsh.h b/Src/zsh.h index c73ae3b9a..1c693fef4 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -1405,6 +1405,9 @@ struct tieddata { #define SUB_ALL 0x0100 /* match complete string */ #define SUB_GLOBAL 0x0200 /* global substitution ${..//all/these} */ #define SUB_DOSUBST 0x0400 /* replacement string needs substituting */ +#define SUB_RETFAIL 0x0800 /* return status 0 if no match */ +#define SUB_START 0x1000 /* force match at start with SUB_END + * and no SUB_SUBSTR */ /* Flags as the second argument to prefork */ #define PF_TYPESET 0x01 /* argument handled like typeset foo=bar */ @@ -1631,6 +1634,7 @@ enum { HISTREDUCEBLANKS, HISTSAVEBYCOPY, HISTSAVENODUPS, + HISTSUBSTPATTERN, HISTVERIFY, HUP, IGNOREBRACES, diff --git a/Test/E01options.ztst b/Test/E01options.ztst index da4020c15..1fbe0cc93 100644 --- a/Test/E01options.ztst +++ b/Test/E01options.ztst @@ -487,6 +487,20 @@ >tmpcd tmpfile1 tmpfile2 >tmp* + setopt histsubstpattern + print *(:s/t??/TING/) + foo=(tmp*) + print ${foo:s/??p/THUMP/} + foo=(one.c two.c three.c) + print ${foo:s/#%(#b)t(*).c/T${match[1]}.X/} + print *(#q:s/#(#b)tmp(*e)/'scrunchy${match[1]}'/) + unsetopt histsubstpattern +0:HIST_SUBST_PATTERN option +>TINGcd TINGfile1 TINGfile2 +>THUMPcd THUMPfile1 THUMPfile2 +>one.c Two.X Three.X +>scrunchyfile1 scrunchyfile2 tmpcd + setopt ignorebraces echo X{a,b}Y unsetopt ignorebraces |