about summary refs log tree commit diff
path: root/Functions/Misc/zmv
blob: b4f9b94ba962ec312e4d0376810b6e465e776a39 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# function zmv {
# zmv, zcp, zln:
#
# This is a multiple move based on zsh pattern matching.  To get the full
# power of it, you need a postgraduate degree in zsh.  However, simple
# tasks work OK, so if that's all you need, here are some basic examples:
#   zmv '(*).txt' '$1.lis'
# Rename foo.txt to foo.lis, etc.  The parenthesis is the thing that
# gets replaced by the $1 (not the `*', as happens in mmv, and note the
# `$', not `=', so that you need to quote both words).
#   zmv '(**/)(*).txt '$1$2.lis'
# The same, but scanning through subdirectories.  The $1 becomes the full
# path.  Note that you need to write it like this; you can't get away with
# '(**/*).txt'.
#   zmv -w '**/*.txt' '$1$2.lis'
# This is the lazy version of the one above; zsh picks out the patterns
# for you.  The catch here is that you don't need the / in the replacement
# pattern.  (It's not really a catch, since $1 can be empty.)
#   zmv -C '**/(*).txt' ~/save/'$1'.lis
# Copy, instead of move, all .txt files in subdirectories to .lis files
# in the single directory `~/save'.  Note that the ~ was not quoted.
# You can test things safely by using the `-n' (no, not now) option.
# Clashes, where multiple files are renamed or copied to the same one, are
# picked up.
#
# Here's a more detailed description.
#
# Use zsh pattern matching to move, copy or link files, depending on
# the last two characters of the function name.  The general syntax is
#   zmv '<inpat>' '<outstring>'
# <inpat> is a globbing pattern, so it should be quoted to prevent it from
# immediate expansion, while <outstring> is a string that will be
# re-evaluated and hence may contain parameter substitutions, which should
# also be quoted.  Each set of parentheses in <inpat> (apart from those
# around glob qualifiers, if you use the -Q option, and globbing flags) may
# be referred to by a positional parameter in <outstring>, i.e. the first
# (...) matched is given by $1, and so on.  For example,
#   zmv '([a-z])(*).txt' '${(C)1}$2.txt'
# renames algernon.txt to Algernon.txt, boris.txt to Boris.txt and so on.
# The original file matched can be referred to as $f in the second
# argument; accidental or deliberate use of other parameters is at owner's
# risk and is not covered by the (non-existent) guarantee.
#
# As usual in zsh, /'s don't work inside parentheses.  There is a special
# case for (**/) and (***/):  these have the expected effect that the
# entire relevant path will be substituted by the appropriate positional
# parameter.
#
# There is a shortcut avoiding the use of parenthesis with the option -w
# (with wildcards), which picks out any expressions `*', `?', `<range>'
# (<->, <1-10>, etc.), `[...]', possibly followed by `#'s, `**/', `***/', and
# automatically parenthesises them. (You should quote any ['s or ]'s which
# appear inside [...] and which do not come from ranges of the form
# `[:alpha:]'.)  So for example, in
#    zmv -w '[[:upper:]]*' '${(L)1}$2'
# the $1 refers to the expression `[[:upper:]]' and the $2 refers to
# `*'. Thus this finds any file with an upper case first character and
# renames it to one with a lowercase first character.  Note that any
# existing parentheses are active, too, so you must count accordingly.
# Furthermore, an expression like '(?)' will be rewritten as '((?))' --- in
# other words, parenthesising of wildcards is independent of any existing
# parentheses.
#
# Any file whose name is not changed by the substitution is simply ignored.
# Any error --- a substitution resulted in an empty string, two
# substitutions gave the same result, the destination was an existing
# regular file and -f was not given --- causes the entire function to abort
# without doing anything.
#
# Options:
#  -f  force overwriting of destination files.  Not currently passed
#      down to the mv/cp/ln command due to vagaries of implementations
#      (but you can use -o-f to do that).
#  -i  interactive: show each line to be executed and ask the user whether
#      to execute it.  Y or y will execute it, anything else will skip it.
#      Note that you just need to type one character.
#  -n  no execution: print what would happen, but don't do it.
#  -q  Turn bare glob qualifiers off:  now assumed by default, so this
#      has no effect.
#  -Q  Force bare glob qualifiers on.  Don't turn this on unless you are
#      actually using glob qualifiers in a pattern (see below).
#  -s  symbolic, passed down to ln; only works with zln or z?? -L.
#  -v  verbose: print line as it's being executed.
#  -o <optstring>
#      <optstring> will be split into words and passed down verbatim
#      to the cp, ln or mv called to perform the work.  It will probably
#      begin with a `-'.
#  -p <program>
#      Call <program> instead of cp, ln or mv.  Whatever it does, it should
#      at least understand the form '<program> -- <oldname> <newname>',
#      where <oldname> and <newname> are filenames generated.
#  -w  Pick out wildcard parts of the pattern, as described above, and
#      implicitly add parentheses for referring to them.
#  -C
#  -L
#  -M  Force cp, ln or mv, respectively, regardless of the name of the
#      function.
#
# Bugs:
#   Parenthesised expressions can be confused with glob qualifiers, for
#   example a trailing '(*)' would be treated as a glob qualifier in
#   ordinary globbing.  This has proved so annoying that glob qualifiers
#   are now turned off by default.  To force the use of glob qualifiers,
#   give the flag -Q.
#
#   The second argument is re-evaluated in order to expand the parameters,
#   so quoting may be a bit haphazard.  In particular, a double quote
#   will need an extra level of quoting.
#
#   The pattern is always treated as an extendedglob pattern.  This
#   can also be interpreted as a feature.
#
# Unbugs:
#   You don't need braces around the 1 in expressions like '$1t' as
#   non-positional parameters may not start with a number, although
#   paranoiacs like the author will probably put them there anyway.

emulate -RL zsh
setopt extendedglob

local f g args match mbegin mend files action myname tmpf opt exec
local opt_f opt_i opt_n opt_q opt_Q opt_s opt_M opt_C opt_L 
local opt_o opt_p opt_v opt_w MATCH MBEGIN MEND
local pat repl errstr fpat hasglobqual opat
typeset -A from to
integer stat

while getopts ":o:p:MCLfinqQsvw" opt; do
  if [[ $opt = "?" ]]; then
    print -P "%N: unrecognized option: -$OPTARG" >&2
    return 1
  fi
  eval "opt_$opt=${OPTARG:--$opt}"
done
(( OPTIND > 1 )) && shift $(( OPTIND - 1 ))

[[ -z $opt_Q ]] && setopt nobareglobqual
[[ -n $opt_M ]] && action=mv
[[ -n $opt_C ]] && action=cp
[[ -n $opt_L ]] && action=ln
[[ -n $opt_p ]] && action=$opt_p

if (( $# != 2 )); then
  print -P "Usage:
  %N oldpattern newpattern
where oldpattern contains parenthesis surrounding patterns which will
be replaced in turn by $1, $2, ... in newpattern.  For example,
  %N '(*).lis' '\$1.txt'
renames 'foo.lis' to 'foo.txt', 'my.old.stuff.lis' to 'my.old.stuff.txt',
and so on." >&2
  return 1
fi

pat=$1
repl=$2

if [[ -z $action ]]; then
  # We can't necessarily get the name of the function directly, because
  # of no_function_argzero stupidity.
  tmpf=${TMPPREFIX}zmv$$
  print -P %N >$tmpf
  myname=$(<$tmpf)
  rm -f $tmpf

  action=$myname[-2,-1]

  if [[ $action != (cp|mv|ln) ]]; then
    print "Action $action not recognised: must be cp, mv or ln." >&2
    return 1
  fi
fi


if [[ -n $opt_s && $action != ln ]]; then
  print -P "%N: invalid option: -s" >&2
  return 1
fi

if [[ -n $opt_w ]]; then
  # Parenthesise all wildcards.
  local newpat
  # Well, this seems to work.
  # The tricky bit is getting all forms of [...] correct, but as long
  # as we require inactive bits to be backslashed its not so bad.
  newpat="${pat//\
(#m)(\*\*#\/|[*?]|\<[0-9]#-[0-9]#\>|\[(\[:[a-z]##:\]|\\\[|\\\]|[^\[\]]##)##\])\##\
/($MATCH)}"
  if [[ $newpat = $pat ]]; then
    print -P "%N: warning: no wildcards were found" >&2
  else
    pat=$newpat
  fi
fi

if [[ -n $opt_Q && $pat = (#b)(*)\([^\)\|\~]##\) ]]; then
  hasglobqual=q
  # strip off qualifiers for use as ordinary pattern
  opat=$match[1]
fi

if [[ $pat = (#b)(*)\((\*\*##/)\)(*) ]]; then
  fpat="$match[1]$match[2]$match[3]"
  # Now make sure we do depth-first searching.
  # This is so that the names of any files are altered before the
  # names of the directories they are in.
  if [[ -n $opt_Q && -n $hasglobqual ]]; then
    fpat[-1]="odon)"
  else
    setopt bareglobqual
    fpat="${fpat}(odon)"
  fi
else
  fpat=$pat
fi
files=(${~fpat})

[[ -n $hasglobqual ]] && pat=$opat

errs=()

for f in $files; do
  if [[ $pat = (#b)(*)\(\*\*##/\)(*) ]]; then
    # This looks like a recursive glob.  This isn't good enough,
    # because we should really enforce that $match[1] and $match[2]
    # don't match slashes unless they were explicitly given.  But
    # it's a start.  It's fine for the classic case where (**/) is
    # at the start of the pattern.
    pat="$match[1](*/|)$match[2]"
  fi
  [[ -e $f && $f = (#b)${~pat} ]] || continue
  set -- "$match[@]"
  eval g=\"$repl\"
  if [[ -z $g ]]; then
    errs=($errs "$f expanded to empty string")
  elif [[ $f = $g ]]; then
    # don't cause error: more useful just to skip
    #   errs=($errs "$f not altered by substitution")
    [[ -n $opt_v ]] && print "$f not altered, ignored"
    continue
  elif [[ -n $from[$g] && ! -d $g ]]; then
    errs=($errs "$f and $from[$g] both map to $g")
  elif [[ -f $g && -z $opt_f ]]; then
    errs=($errs "file exists: $g")
  fi
  from[$g]=$f
  to[$f]=$g
done

if (( $#errs )); then
  print -P "%N: error(s) in substitution:" >&2
  print -l $errs >&2
  return 1
fi

for f in $files; do
  [[ -z $to[$f] ]] && continue
  exec=($action ${=opt_o} $opt_s -- $f $to[$f])
  [[ -n $opt_i$opt_n$opt_v ]] && print -- $exec
  if [[ -n $opt_i ]]; then
    read -q 'opt?Execute? ' || continue
  fi
  if [[ -z $opt_n ]]; then
    $exec || stat=1
  fi
done

return $stat
# }