From c508c6e368ac7faef55a16d9c85a2fbdd21232fc Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 18 Jan 2010 12:47:09 +0000 Subject: 27604: substitution using regular expressions --- Doc/Zsh/contrib.yo | 41 ++++++++++++++++++++++++++++++-------- Functions/Misc/.distfiles | 1 + Functions/Misc/regexp-replace | 35 ++++++++++++++++++++++++++++++++ Functions/Zle/replace-string-again | 12 ++++++++--- 4 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 Functions/Misc/regexp-replace diff --git a/Doc/Zsh/contrib.yo b/Doc/Zsh/contrib.yo index 4ca85fc66..9df1770f2 100644 --- a/Doc/Zsh/contrib.yo +++ b/Doc/Zsh/contrib.yo @@ -1570,14 +1570,14 @@ tindex(replace-string-again) tindex(replace-pattern) xitem(tt(replace-string), tt(replace-pattern)) item(tt(replace-string-again), tt(replace-pattern-again))( -The function tt(replace-string) implements two widgets. +The function tt(replace-string) implements three widgets. If defined under the same name as the function, it prompts for two strings; the first (source) string will be replaced by the second everywhere it occurs in the line editing buffer. If the widget name contains the word `tt(pattern)', for example by defining the widget using the command `tt(zle -N replace-pattern -replace-string)', then the replacement is done by pattern matching. All +replace-string)', then the matching is performed using zsh patterns. All zsh extended globbing patterns can be used in the source string; note that unlike filename generation the pattern does not need to match an entire word, nor do glob qualifiers have any effect. In addition, the @@ -1588,6 +1588,12 @@ replaced by the var(N)th parenthesised expression matched. The form `tt(\{)var(N)tt(})' may be used to protect the digit from following digits. +If the widget instead contains the word `tt(regex)' (or `tt(regexp)'), +then the matching is performed using regular expressions, respecting +the setting of the option tt(RE_MATCH_PCRE) (see the description of the +function tt(regexp-replace) below). The facilities described +for pattern matching are also available. + By default the previous source or replacement string will not be offered for editing. However, this feature can be activated by setting the style tt(edit-previous) in the context tt(:zle:)var(widget) (for example, @@ -1595,12 +1601,12 @@ tt(:zle:replace-string)) to tt(true). In addition, a positive numeric argument forces the previous values to be offered, a negative or zero argument forces them not to be. -The function tt(replace-string-again) can be used to repeat the -previous replacement; no prompting is done. As with tt(replace-string), if -the name of the widget contains the word `tt(pattern)', pattern matching -is performed, else a literal string replacement. Note that the -previous source and replacement text are the same whether pattern or string -matching is used. +The function tt(replace-string-again) can be used to repeat the previous +replacement; no prompting is done. As with tt(replace-string), if the name +of the widget contains the word `tt(pattern)' or `tt(regex)', pattern or +regular expression matching is performed, else a literal string +replacement. Note that the previous source and replacement text are the +same whether pattern, regular expression or string matching is used. For example, starting from the line: @@ -2574,6 +2580,25 @@ the context prefix `tt(:completion:nslookup)'. See also the tt(pager), tt(prompt) and tt(rprompt) styles below. ) +findex(regexp-replace) +item(tt(regexp-replace) var(var) var(regexp) var(replace))( +Use regular expressions to perform a global search and replace operation +on a variable. If the option tt(RE_MATCH_PCRE) is not set, POSIX +extended regular expressions are used, else Perl-compatible regular +expressions (this requires the shell to be linked against the tt(pcre) +library). + +var(var) is the name of the variable containing the string to be matched. +The variable will be modified directly by the function. The +variables tt(MATCH), tt(MBEGIN), tt(MEND), tt(match), tt(mbegin), tt(mend) +should be avoided as these are used by the regular expression code. + +var(regexp) is the regular expression to match against the string. + +var(replace) is the replacement text. This can contain parameter, command +and arithmetic expressions which will be replaced: in particular, a +reference to tt($MATCH) will be replaced by the text matched by the pattern. +) findex(run-help) item(tt(run-help) var(cmd))( This function is designed to be invoked by the tt(run-help) ZLE widget, diff --git a/Functions/Misc/.distfiles b/Functions/Misc/.distfiles index b39d09130..d340d93d7 100644 --- a/Functions/Misc/.distfiles +++ b/Functions/Misc/.distfiles @@ -10,6 +10,7 @@ is-at-least mere nslookup promptnl +regexp-replace relative run-help run-help-git diff --git a/Functions/Misc/regexp-replace b/Functions/Misc/regexp-replace new file mode 100644 index 000000000..5aaedafba --- /dev/null +++ b/Functions/Misc/regexp-replace @@ -0,0 +1,35 @@ +# Replace all occurrences of a regular expression in a variable. The +# variable is modified directly. Respects the setting of the +# option RE_MATCH_PCRE. +# +# First argument: *name* (not contents) of variable. +# Second argument: regular expression +# Third argument: replacement string. This can contain all forms of +# $ and backtick substitutions; in particular, $MATCH will be replaced +# by the portion of the string matched by the regular expression. + +integer pcre + +[[ -o re_match_pcre ]] && pcre=1 + +emulate -L zsh +(( pcre )) && setopt re_match_pcre + +# $4 is the string to be matched +4=${(P)1} +# $5 is the final string +5= +local MATCH MBEGIN MEND +local -a match mbegin mend + +while [[ -n $4 ]]; do + if [[ $4 =~ $2 ]]; then + 5+=${4[1,MBEGIN-1]}${(e)3} + 4=${4[MEND+1,-1]} + else + break + fi +done +5+=$4 + +eval ${1}=${(q)5} diff --git a/Functions/Zle/replace-string-again b/Functions/Zle/replace-string-again index 3d3486437..8f4d23854 100644 --- a/Functions/Zle/replace-string-again +++ b/Functions/Zle/replace-string-again @@ -13,7 +13,7 @@ if [[ -z $_replace_string_src ]]; then return 1 fi -if [[ $curwidget = *pattern* ]]; then +if [[ $curwidget = *(pattern|regex)* ]]; then local rep2 # The following horror is so that an & preceded by an even # number of backslashes is active, without stripping backslashes, @@ -38,8 +38,14 @@ if [[ $curwidget = *pattern* ]]; then rep=${match[5]} done rep2+=$rep - LBUFFER=${LBUFFER//(#bm)$~_replace_string_src/${(e)rep2}} - RBUFFER=${RBUFFER//(#bm)$~_replace_string_src/${(e)rep2}} + if [[ $curwidget = *regex* ]]; then + autoload -U regexp-replace + regexp-replace LBUFFER $_replace_string_src $rep2 + regexp-replace RBUFFER $_replace_string_src $rep2 + else + LBUFFER=${LBUFFER//(#bm)$~_replace_string_src/${(e)rep2}} + RBUFFER=${RBUFFER//(#bm)$~_replace_string_src/${(e)rep2}} + fi else LBUFFER=${LBUFFER//$_replace_string_src/$_replace_string_rep} RBUFFER=${RBUFFER//$_replace_string_src/$_replace_string_rep} -- cgit 1.4.1