1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
|
# Match words by the style given below. The matching depends on the
# cursor position. The matched_words array is set to the matched portions
# separately. These look like:
# <stuff-at-start> <word-before-cursor> <whitespace-before-cursor>
# <whitespace-after-cursor> <word-after-cursor> <whitespace-after-word>
# <stuff-at-end>
# where the cursor position is always after the third item and `after'
# is to be interpreted as `after or on'. Some
# of the array elements will be empty; this depends on the style.
# For example
# foo bar rod stick
# ^
# with the cursor where indicated will with typical settings produce the
# elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'.
#
# The style word-style can be set to indicate what a word is.
# The three possibilities are:
#
# shell Words are shell words, i.e. elements of a command line.
# whitespace Words are space delimited words; only space or tab characters
# are considered to terminated a word.
# normal (the default): the usual zle logic is applied, with all
# alphanumeric characters plus any characters in $WORDCHARS
# considered parts of a word. The style word-chars overrides
# the parameter. (Any currently undefined value will be
# treated as `normal', but this should not be relied upon.)
# specified Similar to normal, except that only the words given
# in the string (and not also alphanumeric characters)
# are to be considered parts of words.
# unspecified The negation of `specified': the characters given
# are those that aren't to be considered parts of a word.
# They should probably include white space.
#
# In the case of the `normal' or `(un)specified', more control on the
# behaviour can be obtained by setting the style `word-chars' for the
# current context. The value is used to override $WORDCHARS locally.
# Hence,
# zstyle ':zle:transpose-words*' word-style normal
# zstyle ':zle:transpose-words*' word-chars ''
# will force bash-style word recognition, i.e only alphanumeric characters
# are considered parts of a word. It is up to the function which calls
# match-words-by-style to set the context in the variable curcontext,
# else a default context will be used (not recommended).
#
# You can override the use of word-chars with the style word-class.
# This specifies the same information, but as a character class.
# The surrounding square brackets shouldn't be given, but anything
# which can appear inside is allowed. For example,
# zstyle ':zle:*' word-class '-:[:alnum:]'
# is valid. Note the usual care with `]' , `^' and `-' must be taken if
# they need to appear as individual characters rather than for grouping.
#
# The final style is `skip-chars'. This is an integer; that many
# characters counting the one under the cursor will be treated as
# whitespace regardless and added to the front of the fourth element of
# matched_words. The default is zero, i.e. the character under the cursor
# will appear in <whitespace-after-cursor> if it is whitespace, else in
# <word-after-cursor>. This style is mostly useful for forcing
# transposition to ignore the current character.
#
# The values of the styles can be overridden by options to the function:
# -w <word-style>
# -s <skip-chars>
# -c <word-class>
# -C <word-chars>
emulate -L zsh
setopt extendedglob
local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass
local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip
local nwords MATCH MBEGIN MEND subwordrange
local curcontext=${curcontext:-:zle:match-words-by-style}
autoload -Uz match-word-context
match-word-context
while getopts "w:s:c:C:r:" opt; do
case $opt in
(w)
wordstyle=$OPTARG
;;
(s)
skip=$OPTARG
;;
(c)
wordclass=$OPTARG
;;
(C)
wordchars=$OPTARG
;;
(r)
subwordrange=$OPTARG
;;
(*)
return 1
;;
esac
done
[[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle
[[ -z $skip ]] && zstyle -s $curcontext skip-chars skip
[[ -z $skip ]] && skip=0
case $wordstyle in
(*shell*) local bufwords
# This splits the line into words as the shell understands them.
bufwords=(${(Z:n:)LBUFFER})
nwords=${#bufwords}
wordpat1="${(q)bufwords[-1]}"
# Take substring of RBUFFER to skip over $skip characters
# from the cursor position.
bufwords=(${(Z:n:)RBUFFER[1+$skip,-1]})
wordpat2="${(q)bufwords[1]}"
spacepat='[[:space:]]#'
# Assume the words are at the top level, i.e. if we are inside
# 'something with spaces' then we need to ignore the embedded
# spaces and consider the whole word.
bufwords=(${(Z:n:)BUFFER})
if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then
# Yes, we're in the middle of a shell word.
# Find out what's in front.
eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
# Now everything from ${#pat1}+1 is wordy
wordpat1=${LBUFFER[${#pat1}+1,-1]}
wordpat2=${RBUFFER[1,${#bufwords[$nwords]}-${#wordpat1}+1]}
wordpat1=${(q)wordpat1}
wordpat2=${(q)wordpat2}
fi
;;
(*space*) spacepat='[[:space:]]#'
wordpat1='[^[:space:]]##'
wordpat2=$wordpat1
;;
(*) local wc
# See if there is a character class.
wc=$wordclass
if [[ -n $wc ]] || zstyle -s $curcontext word-class wc; then
# Treat as a character class: do minimal quoting.
wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH}
else
# See if there is a local version of $WORDCHARS.
wc=$wordchars
if [[ -z $wc ]]; then
zstyle -s $curcontext word-chars wc ||
wc=$WORDCHARS
fi
if [[ $wc = (#b)(?*)-(*) ]]; then
# We need to bring any `-' to the front to avoid confusing
# character classes... we get away with `]' since in zsh
# this isn't a pattern character if it's quoted.
wc=-$match[1]$match[2]
fi
wc="${(q)wc}"
fi
# Quote $wc where necessary, because we don't want those
# characters to be considered as pattern characters later on.
if [[ $wordstyle = *specified* ]]; then
if [[ $wordstyle != *unspecified* ]]; then
# The given set of characters are the word characters, nothing else
wordpat1="[${wc}]##"
# anything else is a space.
spacepat="[^${wc}]#"
else
# The other way round.
wordpat1="[^${wc}]##"
spacepat="[${wc}]#"
fi
else
# Normal: similar, but add alphanumerics.
wordpat1="[${wc}[:alnum:]]##"
spacepat="[^${wc}[:alnum:]]#"
fi
wordpat2=$wordpat1
;;
esac
# The eval makes any special characters in the parameters active.
# In particular, we need the surrounding `[' s to be `real'.
# This is why we quoted the wordpats in the `shell' option, where
# they have to be treated as literal strings at this point.
match=()
eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
word1=$match[1]
ws1=$match[2]
if [[ $wordstyle = *subword* ]]; then
if [[ -z $subwordrange ]] &&
! zstyle -s $curcontext subword-range subwordrange; then
subwordrange='[:upper:]'
fi
# The rule here is that a word boundary may be an upper case letter
# followed by a lower case letter, or an upper case letter at
# the start of a group of upper case letters. To make
# it easier to be consistent, we just use anything that
# isn't an upper case characer instead of a lower case
# character.
# Here the initial "*" will match greedily, so we get the
# last such match, as we want.
integer epos
if [[ $word1 = (#b)(*)([${~subwordrange}][^${~subwordrange}]*) ]]; then
(( epos = ${#match[1]} ))
fi
if [[ $word1 = (#b)(*[^${~subwordrange}])([${~subwordrange}]*) ]]; then
(( ${#match[1]} > epos )) && (( epos = ${#match[1]} ))
fi
if (( epos > 0 )); then
pat1+=$word1[1,epos]
word1=$word1[epos+1,-1]
fi
fi
match=()
charskip=${(l:skip::?:)}
eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\
${wordpat2}')('${spacepat}')}'
ws2=$match[1]
word2=$match[2]
ws3=$match[3]
if [[ $wordstyle = *subword* ]]; then
# Do we have a group of upper case characters at the start
# of word2 (that don't form the entire word)?
# Again, rely on greedy matching of first pattern.
if [[ $word2 = (#b)([${~subwordrange}][${~subwordrange}]##)(*) &&
-n $match[2] ]]; then
# Yes, so the last one is new word boundary.
(( epos = ${#match[1]} - 1 ))
# Otherwise, do we have upper followed by non-upper not
# at the start? Ignore the initial character, we already
# know it's a word boundary so it can be an upper case character
# if it wants.
elif [[ $word2 = (#b)(?[^${~subwordrange}]##)[${~subwordrange}]* ]]; then
(( epos = ${#match[1]} ))
else
(( epos = 0 ))
fi
if (( epos )); then
# Careful: if we matched a subword there's no whitespace immediately
# after the matched word, so ws3 should be empty and any existing
# value tacked onto pat2.
pat2="${word2[epos+1,-1]}$ws3$pat2"
ws3=
word2=$word2[1,epos]
fi
fi
matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")
|