#autoload ## usage: _regex_arguments funcname regex ## configuration key used: # regex_arguments_path # The path to a directory for caching. (default: ~/.zsh/regex_arguments) ## # _regex_arguments compiles `regex' and emit the result of the state # machine into the function `funcname'. `funcname' parses a command line # according to `regex' and evaluate appropriate actions in `regex'. Before # parsing the command line string is genereted by concatinating `words' # (before `PREFIX') and `PREFIX' with a separator NUL ($'\0'). # The `regex' is defined as follows. ## regex word definition: # pattern = "/" ( glob | "[]" ) "/" [ "+" | "-" ] # lookahead = "%" glob "%" # guard = "-" zsh-code-to-eval # action = ":" zsh-code-to-eval ## regex word sequence definition: # element = pattern [ lookahead ] [ guard ] [ action ] # # regex = element # | "(" regex ")" # | regex "#" # | regex regex # | regex "|" regex # example: # compdef _tst tst # _regex_arguments _tst /$'[^\0]#\0'/ /$'[^\0]#\0'/ :'compadd aaa' # _tst complete `aaa' for first argument. # First $'[^\0]#\0' is required to match with command name. # _regex_arguments _tst /$'[^\0]#\0'/ \( /$'[^\0]#\0'/ :'compadd aaa' /$'[^\0]#\0'/ :'compadd bbb' \) \# # _tst complete `aaa' for (2i+1)th argument and `bbb' for (2i)th argument. # _regex_arguments _tst /$'[^\0]#\0'/ \( /$'[^\0]#\0'/ :'compadd aaa' \| /$'[^\0]#\0'/ :'compadd bbb' \) \# # _tst complete `aaa' or `bbb'. ## Recursive decent regex parser # return status of parser functions: # 0 : success # 1 : parse error # 2 : fatal parse error _ra_parse_elt () { local state act if (( $#regex < index )); then return 1 else case "$regex[index]" in /*/([-+]|)) state=$index first=($state) last=($state) nullable= case "$regex[index]" in */+) cutoff[$state]=+;; */) cutoff[$state]=/;; */-) cutoff[$state]=-;; esac pattern[$state]="${${regex[index++]#/}%/([-+]|)}" if [[ $pattern[$state] != "[]" ]]; then pattern[$state]="(#b)((#B)$pattern[$state])" fi if [[ $index -le $#regex && $regex[index] = %*% ]]; then lookahead[$state]="(#B)${regex[index++][2,-2]}" else lookahead[$state]="" fi if [[ $index -le $#regex && $regex[index] = -* ]]; then guard[$state]="${regex[index++][2,-1]}" else guard[$state]="" fi if [[ $index -le $#regex && $regex[index] = :* ]]; then act="${regex[index++][2,-1]}" action[$state]="$act" : ${actions[$act]::="${actions[$act]} $state"} else action[$state]="" fi ;; \() (( index++ )) _ra_parse_alt || return $? [[ $index -le $#regex && "$regex[$index]" = \) ]] || return 2 (( index++ )) ;; *) return 1 ;; esac fi return 0 } _ra_parse_clo () { _ra_parse_elt || return $? if (( index <= $#regex )) && [[ "$regex[$index]" = \# ]]; then (( index++ )) nullable=yes for i in $last; do tbl[$i]="$tbl[$i] $first"; done fi return 0 } _ra_parse_seq () { local last_seq local first_seq nullable_seq first_seq=() nullable_seq=yes _ra_parse_clo || { if (( $? == 2 )); then return 2 else first=() last=() nullable=yes return 0 fi } first_seq=($first) last_seq=($last) [[ -n "$nullable" ]] || nullable_seq= while :; do _ra_parse_clo || { if (( $? == 2 )); then return 2 else break fi } for i in $last_seq; do tbl[$i]="${tbl[$i]} $first"; done [[ -n "$nullable_seq" ]] && first_seq=($first_seq $first) [[ -n "$nullable" ]] || { nullable_seq= last_seq=() } last_seq=($last_seq $last) done first=($first_seq) nullable=$nullable_seq last=($last_seq) return 0 } _ra_parse_alt () { local last_alt local first_alt nullable_alt first_alt=() nullable_alt= _ra_parse_seq || return $? first_alt=($first_alt $first) last_alt=($last_alt $last) [[ -n "$nullable" ]] && nullable_alt=yes while :; do (( index <= $#regex )) || break [[ "$regex[$index]" = \| ]] || break (( index++ )) _ra_parse_seq || { if (( $? == 2 )); then return 2 else break fi } first_alt=($first_alt $first) last_alt=($last_alt $last) [[ -n "$nullable" ]] && nullable_alt=yes done first=($first_alt) last=($last_alt) nullable=$nullable_alt return 0 } ## function generator _ra_gen_func () { local old new local state index local test tmp local start="0" old=() new=($start) print -lr - \ "$funcname () {" \ 'setopt localoptions extendedglob' \ 'local _ra_state _ra_left _ra_right _ra_actions' \ "_ra_state=$start" \ '_ra_left=' \ '_ra_right="${(pj:\0:)${(@)words[1,CURRENT - 1]:Q}}"$'\''\0'\''"$PREFIX"' \ '_ra_actions=()' \ 'while :; do' \ 'case "$_ra_state" in' while (( $#new )); do state="$new[1]" shift new old=("$old[@]" "$state") print -lr - \ "$state)" _ra_gen_parse_state print -lr - \ ';;' done print -lr - \ 'esac' \ 'done' \ 'while (( $#_ra_actions )); do' \ 'case "$_ra_actions[1]" in' for tmp in "${(@k)actions}"; do if [[ "$tmp" != '' ]]; then print -lr - "${(j:);&:)${=actions[$tmp]}})" $tmp ';;' fi done print -lr - \ 'esac' \ 'shift _ra_actions' \ 'done' \ '}' } _ra_gen_parse_state () { local actions i p test='if' for index in $=tbl[$state]; do if [[ "$pattern[$index]" != "[]" ]]; then p="$pattern[$index]$lookahead[$index]*" if [[ -z "$guard[$index]" ]]; then print -lr - \ "$test [[ \$_ra_right = \${~:-${(qqqq)p}} ]]" else print -lr - \ "$test [[ \$_ra_right = \${~:-${(qqqq)p}} ]] && {" \ "$guard[$index]" \ "}" fi test='elif' (( $old[(I)$index] || $new[(I)$index] )) || new=($index "$new[@]") print -lr - \ "then" \ "_ra_state=$index" \ '_ra_right="${_ra_right[mend[1] + 1, -1]}"' actions=() for i in $=tbl[$index]; do if [[ -n $action[$i] ]]; then actions=($actions $i) fi done case "$cutoff[$index]" in +) print -lr - \ '_ra_left="$_ra_left$match[1]"' if (( $#actions )); then print -lr - \ "_ra_actions=($actions \$_ra_actions)" fi ;; /) print -lr - \ '_ra_left=' print -lr - \ 'if (( mend[1] )); then' \ "_ra_actions=($actions)" if (( $#actions )); then print -lr - \ 'else' \ "_ra_actions=($actions \$_ra_actions)" fi print -lr - \ 'fi' ;; -) print -lr - \ '_ra_left=' \ "_ra_actions=($actions)" ;; esac fi done if [[ $test != 'if' ]]; then # Some branchs are exists. But all of them are failed. print -lr - \ 'else' \ 'if [[ "$_ra_left$_ra_right" = *$'\''\0'\''* ]]; then' \ '_message "parse failed before current word"' \ '_ra_actions=()' \ 'else' \ 'compset -p $(( $#PREFIX - $#_ra_right - $#_ra_left ))' \ 'fi' \ 'break' \ 'fi' else # There are no branch. print -lr - \ '_message "no more arguments"' \ '_ra_actions=()' \ 'break' fi } _regex_arguments () { setopt localoptions extendedglob local funcname="_regex_arguments_tmp" local funcdef typeset -A tbl cutoff pattern lookahead guard action actions local regex index first last nullable local i state next local cache_dir="${compconfig[regex_arguments_path]:-$HOME/.zsh/regex_arguments}" local cache_file="$cache_dir/$1" local cache_test if ! [[ -f "$cache_file" ]] || ! source "$cache_file" "$@"; then funcname="$1" regex=("${(@)argv[2,-1]}") index=1 tbl=() pattern=() lookahead=() guard=() action=() actions=() _ra_parse_alt if (( $? == 2 || index != $#regex + 1 )); then if (( index != $#regex + 1 )); then print "regex parse error at $index: $regex[index]" >&2 else print "regex parse error at $index (end)" >&2 fi return 1 fi tbl[0]=" $first" unfunction "$funcname" 2>/dev/null funcdef="$(_ra_gen_func)" if [[ -d "$cache_dir" && -w "$cache_dir" ]]; then print -lr - \ 'if [[ $# -eq '$#' && "$*" = '"${(qqqq)*}"' ]]; then' \ "$funcdef" \ 'true; else false; fi' > "${cache_file}.$HOST.$$" source "${cache_file}.$HOST.$$" "$@" mv "${cache_file}.$HOST.$$" "${cache_file}" else source =(print -lr - "$funcdef") fi fi } _regex_arguments "$@"