From 2e89ebbdc712b27e462613e3f1c6bcbca843065a Mon Sep 17 00:00:00 2001 From: Paul Ackersviller Date: Mon, 29 Oct 2007 03:06:09 +0000 Subject: Merge of 22606: word-context style for word matching. --- Doc/Zsh/contrib.yo | 36 ++++++- Functions/Zle/match-word-context | 48 +++++++++ Functions/Zle/match-words-by-style | 203 +++++++++++++++++++++++++++++++++++++ 3 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 Functions/Zle/match-word-context create mode 100644 Functions/Zle/match-words-by-style diff --git a/Doc/Zsh/contrib.yo b/Doc/Zsh/contrib.yo index 4086e30bf..d05328651 100644 --- a/Doc/Zsh/contrib.yo +++ b/Doc/Zsh/contrib.yo @@ -383,12 +383,13 @@ tindex(capitalize-word-match) tindex(up-case-word-match) tindex(down-case-word-match) tindex(select-word-style) +tindex(match-word-context) tindex(match-words-by-style) xitem(tt(forward-word-match), tt(backward-word-match)) xitem(tt(kill-word-match), tt(backward-kill-word-match)) xitem(tt(transpose-words-match), tt(capitalize-word-match)) xitem(tt(up-case-word-match), tt(down-case-word-match)) -item(tt(select-word-style), tt(match-words-by-style))( +item(tt(select-word-style), tt(match-word-context), tt(match-words-by-style))( The eight `tt(-match)' functions are drop-in replacements for the builtin widgets without the suffix. By default they behave in a similar way. However, by the use of styles and the function tt(select-word-style), @@ -462,7 +463,7 @@ Words are whitespace-delimited strings of characters. ) enditem() -The first three of those styles usually use tt($WORDCHARS), but the value +The first three of those rules usually use tt($WORDCHARS), but the value in the parameter can be overridden by the style tt(word-chars), which works in exactly the same way as tt($WORDCHARS). In addition, the style tt(word-class) uses character class syntax to group characters and takes @@ -473,7 +474,7 @@ alphanumerics plus the characters `tt(-)' and `tt(:)'. Be careful including `tt(])', `tt(^)' and `tt(-)' as these are special inside character classes. -The final style is tt(skip-chars). This is mostly useful for +The style tt(skip-chars) is mostly useful for tt(transpose-words) and similar functions. If set, it gives a count of characters starting at the cursor position which will not be considered part of the word and are treated as space, regardless of what they actually @@ -485,6 +486,16 @@ has been set, and tt(transpose-words-match) is called with the cursor on the var(X) of tt(foo)var(X)tt(bar), where var(X) can be any character, then the resulting expression is tt(bar)var(X)tt(foo). +Finer grained control can be obtained by setting the style tt(word-context) +to an array of pairs of entries. Each pair of entries consists of a +var(pattern) and a var(subcontext). The shell argument the cursor is on is +matched against each var(pattern) in turn until one matches; if it does, +the context is extended by a colon and the corresponding var(subcontext). +Note that the test is made against the original word on the line, with no +stripping of quotes. If the cursor is at the end of the line the test is +performed against an empty string; if it is on whitespace between words the +test is made against a single space. Some examples are given below. + Here are some examples of use of the styles, actually taken from the simplified interface in tt(select-word-style): @@ -500,6 +511,21 @@ example(style ':zle:*kill*' word-style space) Uses space-delimited words for widgets with the word `kill' in the name. Neither of the styles tt(word-chars) nor tt(word-class) is used in this case. +Here are some examples of use of the tt(word-context) style to extend +the context. + +example(zstyle ':zle:*' word-context "*/*" file "[[:space:]]" whitespace +zstyle ':zle:transpose-words:whitespace' word-style shell +zstyle ':zle:transpose-words:filename' word-style normal +zstyle ':zle:transpose-words:filename' word-chars '') + +This provides two different ways of using tt(transpose-words) depending on +whether the cursor is on whitespace between words or on a filename, here +any word containing a tt(/). On whitespace, complete arguments as defined +by standard shell rules will be transposed. In a filename, only +alphanumerics will be transposed. Elsewhere, words will be transposed +using the default style for tt(:zle:transpose-words). + The word matching and all the handling of tt(zstyle) settings is actually implemented by the function tt(match-words-by-style). This can be used to create new user-defined widgets. The calling function should set the local @@ -526,6 +552,10 @@ endsitem() For example, tt(match-words-by-style -w shell -c 0) may be used to extract the command argument around the cursor. + +The tt(word-context) style is implemented by the function +tt(match-word-context). This should not usually need to be called +directly. ) tindex(delete-whole-word-match) item(tt(delete-whole-word-match))( diff --git a/Functions/Zle/match-word-context b/Functions/Zle/match-word-context new file mode 100644 index 000000000..da68b6c75 --- /dev/null +++ b/Functions/Zle/match-word-context @@ -0,0 +1,48 @@ +# See if we can extend the word context to something more specific. +# curcontext must be set to the base context by this point; it +# will be appended to directly. + +emulate -L zsh +setopt extendedglob + +local -a worcon bufwords +local pat tag lastword word +integer iword + +zstyle -a $curcontext word-context worcon || return 0 + +if (( ${#worcon} % 2 )); then + zle -M "Bad word-context style in context $curcontext" + return +fi + +bufwords=(${(z)LBUFFER}) +iword=${#bufwords} +lastword=${bufwords[-1]} +bufwords=(${(z)BUFFER}) + +if [[ $lastword = ${bufwords[iword]} ]]; then + # If the word immediately left of the cursor is complete, + # we're not on it. Either we're on unquoted whitespace, or + # the start of a new word. Test the latter. + if [[ -z $RBUFFER ]]; then + # Nothing there, so not in a word. + word='' + elif [[ $RBUFFER[1] = [[:space:]] ]]; then + # Whitespace, so not in a word. + word=' ' + else + # We want the next word along. + word=${bufwords[iword+1]} + fi +else + # We're on a word. + word=${bufwords[iword]} +fi + +for pat tag in "${worcon[@]}"; do + if [[ $word = ${~pat} ]]; then + curcontext+=":$tag" + return + fi +done diff --git a/Functions/Zle/match-words-by-style b/Functions/Zle/match-words-by-style new file mode 100644 index 000000000..ad74a984f --- /dev/null +++ b/Functions/Zle/match-words-by-style @@ -0,0 +1,203 @@ +# Match words by the style given below. The matching depends on the +# cursor position. The matched_words array is set to the matched portions +# separately. These look like: +# +# +# +# where the cursor position is always after the third item and `after' +# is to be interpreted as `after or on'. Some +# of the array elements will be empty; this depends on the style. +# For example +# foo bar rod stick +# ^ +# with the cursor where indicated will with typical settings produce the +# elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'. +# +# The style word-style can be set to indicate what a word is. +# The three possibilities are: +# +# shell Words are shell words, i.e. elements of a command line. +# whitespace Words are space delimited words; only space or tab characters +# are considered to terminated a word. +# normal (the default): the usual zle logic is applied, with all +# alphanumeric characters plus any characters in $WORDCHARS +# considered parts of a word. The style word-chars overrides +# the parameter. (Any currently undefined value will be +# treated as `normal', but this should not be relied upon.) +# specified Similar to normal, except that only the words given +# in the string (and not also alphanumeric characters) +# are to be considered parts of words. +# unspecified The negation of `specified': the characters given +# are those that aren't to be considered parts of a word. +# They should probably include white space. +# +# In the case of the `normal' or `(un)specified', more control on the +# behaviour can be obtained by setting the style `word-chars' for the +# current context. The value is used to override $WORDCHARS locally. +# Hence, +# zstyle ':zle:transpose-words*' word-style normal +# zstyle ':zle:transpose-words*' word-chars '' +# will force bash-style word recognition, i.e only alphanumeric characters +# are considered parts of a word. It is up to the function which calls +# match-words-by-style to set the context in the variable curcontext, +# else a default context will be used (not recommended). +# +# You can override the use of word-chars with the style word-class. +# This specifies the same information, but as a character class. +# The surrounding square brackets shouldn't be given, but anything +# which can appear inside is allowed. For example, +# zstyle ':zle:*' word-class '-:[:alnum:]' +# is valid. Note the usual care with `]' , `^' and `-' must be taken if +# they need to appear as individual characters rather than for grouping. +# +# The final style is `skip-chars'. This is an integer; that many +# characters counting the one under the cursor will be treated as +# whitespace regardless and added to the front of the fourth element of +# matched_words. The default is zero, i.e. the character under the cursor +# will appear in if it is whitespace, else in +# . This style is mostly useful for forcing +# transposition to ignore the current character. +# +# The values of the styles can be overridden by options to the function: +# -w +# -s +# -c +# -C + +emulate -L zsh +setopt extendedglob + +local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass +local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip +local nwords MATCH MBEGIN MEND + +local curcontext=${curcontext:-:zle:match-words-by-style} + +autoload -U match-word-context +match-word-context + +while getopts "w:s:c:C:" opt; do + case $opt in + (w) + wordstyle=$OPTARG + ;; + + (s) + skip=$OPTARG + ;; + + (c) + wordclass=$OPTARG + ;; + + (C) + wordchars=$OPTARG + ;; + + (*) + return 1 + ;; + esac +done + +[[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle +[[ -z $skip ]] && zstyle -s $curcontext skip-chars skip +[[ -z $skip ]] && skip=0 + +case $wordstyle in + (shell) local bufwords + # This splits the line into words as the shell understands them. + bufwords=(${(z)LBUFFER}) + nwords=${#bufwords} + wordpat1="${(q)bufwords[-1]}" + + # Take substring of RBUFFER to skip over $skip characters + # from the cursor position. + bufwords=(${(z)RBUFFER[1+$skip,-1]}) + wordpat2="${(q)bufwords[1]}" + spacepat='[[:space:]]#' + + # Assume the words are at the top level, i.e. if we are inside + # 'something with spaces' then we need to ignore the embedded + # spaces and consider the whole word. + bufwords=(${(z)BUFFER}) + if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then + # Yes, we're in the middle of a shell word. + # Find out what's in front. + eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}' + # Now everything from ${#pat1}+1 is wordy + wordpat1=${LBUFFER[${#pat1}+1,-1]} + wordpat2=${RBUFFER[1,${#bufwords[$nwords]}-${#wordpat1}+1]} + + wordpat1=${(q)wordpat1} + wordpat2=${(q)wordpat2} + fi + ;; + (*space) spacepat='[[:space:]]#' + wordpat1='[^[:space:]]##' + wordpat2=$wordpat1 + ;; + (*) local wc + # See if there is a character class. + wc=$wordclass + if [[ -n $wc ]] || zstyle -s $curcontext word-class wc; then + # Treat as a character class: do minimal quoting. + wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH} + else + # See if there is a local version of $WORDCHARS. + wc=$wordchars + if [[ -z $wc ]]; then + zstyle -s $curcontext word-chars wc || + wc=$WORDCHARS + fi + if [[ $wc = (#b)(?*)-(*) ]]; then + # We need to bring any `-' to the front to avoid confusing + # character classes... we get away with `]' since in zsh + # this isn't a pattern character if it's quoted. + wc=-$match[1]$match[2] + fi + wc="${(q)wc}" + fi + # Quote $wc where necessary, because we don't want those + # characters to be considered as pattern characters later on. + if [[ $wordstyle = *specified ]]; then + if [[ $wordstyle != un* ]]; then + # The given set of characters are the word characters, nothing else + wordpat1="[${wc}]##" + # anything else is a space. + spacepat="[^${wc}]#" + else + # The other way round. + wordpat1="[^${wc}]##" + spacepat="[${wc}]#" + fi + else + # Normal: similar, but add alphanumerics. + wordpat1="[${wc}[:alnum:]]##" + spacepat="[^${wc}[:alnum:]]#" + fi + wordpat2=$wordpat1 + ;; +esac + +# The eval makes any special characters in the parameters active. +# In particular, we need the surrounding `[' s to be `real'. +# This is why we quoted the wordpats in the `shell' option, where +# they have to be treated as literal strings at this point. +match=() +eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}' +word1=$match[1] +ws1=$match[2] + +match=() +charskip= +repeat $skip charskip+=\? + +eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\ +${wordpat2}')('${spacepat}')}' + +ws2=$match[1] +word2=$match[2] +ws3=$match[3] + +matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2") -- cgit 1.4.1