From 17c6a826380765b822805364e2261f4580b95a5f Mon Sep 17 00:00:00 2001 From: Leah Neukirchen Date: Mon, 9 Oct 2017 15:53:20 +0200 Subject: WIP implement percent matching with regexp --- xe.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 204 insertions(+), 27 deletions(-) diff --git a/xe.c b/xe.c index 88c7544..f9d3b23 100644 --- a/xe.c +++ b/xe.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -359,50 +360,213 @@ parse_jobs(char *s) return n; } +// perc is a variant of fnmatch, implemented here using regexec: +// start of string -> ^ +// * -> [^/] +// ** -> .* +// ? -> . +// % -> ([^/][^/]*) +// [!abc] -> [^abc] +// [] -> X^ (never matches) +// {a,b,c} -> (a|b|c) +// / (possibly multiple) -> //* +// end of string -> $ +// +// differences to fnmatch: +// - no special prefix . handling +// - / in [a/b] always ignored +// - [] matches nothing +// - no collations + +struct perc_pattern { + int init; + regex_t re; + int just_base; + int perc_group; + char *match; + int matchlen; +}; + +void +perc_compile(char *s, struct perc_pattern *opat) +{ +#define ANY do { *r++ = '['; *r++ = '^'; *r++ = '/'; *r++ = ']'; } while (0) + + int braces = 0; + int bracelvl = 0; + int nthmatch = 0; + int just_base = 1; + + char buf[1024]; + char *r = buf; + + *r++ = '^'; + + while (*s) { + switch (*s) { + case '*': + if (s[1] == '*') { + just_base = 0; + s++; + *r++ = '.'; + } else { + ANY; + } + *r++ = '*'; + break; + case '?': + ANY; + break; + case '%': + if (bracelvl > 0) { + fprintf(stderr, "xe: %% not supported inside {}\n"); + exit(1); + } + braces++; + nthmatch = braces; + *r++ = '('; + ANY; + ANY; + *r++ = '*'; + *r++ = ')'; + break; + case '/': + just_base = 0; + *r++ = '/'; + *r++ = '/'; + *r++ = '*'; + while (s[1] == '/') + s++; + break; + case '{': + braces++; + bracelvl++; + *r++ = '('; + break; + case '}': + if (bracelvl > 0) { + bracelvl--; + *r++ = ')'; + } else { + *r++ = '}'; + } + break; + case ',': + *r++ = bracelvl > 0 ? '|' : ','; + break; + case '[': + s++; + if (*s == ']') { // [] -> X^, matches never + *r++ = 'X'; + *r++ = '^'; + break; + } + *r++ = '['; + if (*s == '!') { + s++; + *r++ = '^'; + } + while (*s && *s != ']') { + // also disables [[:foo:]] and [/] + if (*s == '[' || *s == '/' || *s == '\\') + *r++ = '\\'; + *r++ = *s++; + } + if (!*s) { + fprintf(stderr, "xe: unmatched [\n"); + exit(1); + } + *r++ = ']'; + break; + case '(': + case ')': + case '.': + case '^': + case '$': + case '\\': + *r++ = '\\'; + /* FALL THROUGH */ + default: + *r++ = *s; + } + s++; + } + + *r++ = '$'; + *r = 0; + + if (bracelvl > 0) { + fprintf(stderr, "unmatched {\n"); + exit(1); + } + +// fprintf(stderr, "regex %s (match %d, base %d)\n", buf, nthmatch, just_base); + + opat->init = 1; + opat->perc_group = nthmatch; + opat->just_base = just_base; + + int v; + if ((v = regcomp(&opat->re, buf, REG_EXTENDED))) { + fprintf(stderr, + "xe: internal regex error %d, please report a bug.\n", v); + exit(1); + } + +#undef ANY +} + int -perc_match(char *pat, char *arg) +perc_match(struct perc_pattern *pat, char *arg) { - if (!strchr(pat, '/')) { + regmatch_t matches[64]; + + if (pat->perc_group >= 64) { + fprintf(stderr, "xe: too many matching groups.\n"); + exit(1); + } + + if (pat->just_base) { char *d = strrchr(arg, '/'); if (d) arg = d + 1; } - char *s = strchr(pat, '%'); - - if (!s) - return strcmp(arg, pat) == 0; - - size_t la = strlen(arg); - size_t lp = strlen(pat); + if (regexec(&pat->re, arg, + sizeof matches / sizeof matches[0], matches, 0) == 0) { + pat->match = arg + matches[pat->perc_group].rm_so; + pat->matchlen = (int)(matches[pat->perc_group].rm_eo - + matches[pat->perc_group].rm_so); + return 1; + } - return la >= lp && - strncmp(arg, pat, s - pat) == 0 && - strncmp(arg + la - (pat + lp - (s + 1)), - s + 1, - pat + lp - (s + 1)) == 0; + pat->match = 0; + pat->matchlen = 0; + return 0; } char * -perc_subst(char *pat, char *base, char *arg) +perc_subst(struct perc_pattern *pat, char *base, char *arg) { static char buf[2048]; size_t l; char *dir = base; - if (!strchr(pat, '/')) { + if (arg[0] == '@' && + arg[1] == 0) + return base; + + if (pat->just_base) { char *d = strrchr(base, '/'); if (d) base = d + 1; } - char *s = strchr(pat, '%'); char *t = strchr(arg, '%'); - - if (!t) + if (!t || pat->matchlen == 0) return arg; - if (s) + if (pat->perc_group) l = snprintf(buf, sizeof buf, "%.*s%.*s%.*s%.*s", (int)(base - dir), dir, @@ -410,8 +574,8 @@ perc_subst(char *pat, char *base, char *arg) (int)(t - arg), arg, - (int)(strlen(base) - (pat + strlen(pat) - (s + 1))), - base + (s - pat), + pat->matchlen, + pat->match, (int)(arg + strlen(arg) - t), t+1); @@ -544,15 +708,28 @@ main(int argc, char *argv[], char *envp[]) exit(1); } + int n; + int cases = 0; + for (n = optind; n < cmdend; n++) + if (argv[n][0] == '+' && + argv[n][1] == '\0') + cases++; + + struct perc_pattern *patterns = + calloc(cases + 1, sizeof patterns[0]); + if (!patterns) + exit(1); + while ((arg = getarg())) { buflen = 0; argslen = 0; - int n; - for (n = optind, i = n + 1; n < cmdend; n = i + 1) { - char *pat = argv[n]; + cases = 0; + for (n = optind, i = n + 1; n < cmdend; cases++, n = i + 1) { + if (!patterns[cases].init) + perc_compile(argv[n], patterns + cases); - int matched = perc_match(pat, arg); + int matched = perc_match(patterns + cases, arg); for (i = n + 1; i < cmdend; i++) { if (argv[i][0] == '+' && @@ -567,7 +744,7 @@ main(int argc, char *argv[], char *envp[]) } if (matched && - !pusharg(perc_subst(pat, arg, argv[i]))) + !pusharg(perc_subst(patterns + cases, arg, argv[i]))) toolong(); } -- cgit 1.4.1