about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLeah Neukirchen <leah@vuxu.org>2017-10-09 15:53:20 +0200
committerLeah Neukirchen <leah@vuxu.org>2017-10-09 15:53:20 +0200
commit17c6a826380765b822805364e2261f4580b95a5f (patch)
treed331d2bd73ab818d3b924d419e529bdb45caf42d
parent3d50e222776f5468c129a639d3892c9521136e1b (diff)
downloadxe-perc-regex.tar.gz
xe-perc-regex.tar.xz
xe-perc-regex.zip
WIP implement percent matching with regexp perc-regex
-rw-r--r--xe.c231
1 files changed, 204 insertions, 27 deletions
diff --git a/xe.c b/xe.c
index 88c7544..f9d3b23 100644
--- a/xe.c
+++ b/xe.c
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -359,50 +360,213 @@ parse_jobs(char *s)
 	return n;
 }
 
+// perc is a variant of fnmatch, implemented here using regexec:
+//   start of string -> ^
+//   * -> [^/]
+//   ** -> .*
+//   ? -> .
+//   % -> ([^/][^/]*)
+//   [!abc] -> [^abc]
+//   [] -> X^ (never matches)
+//   {a,b,c} -> (a|b|c)
+//   / (possibly multiple) -> //*
+//   end of string -> $
+//    
+// differences to fnmatch:
+//    - no special prefix . handling
+//    - / in [a/b] always ignored
+//    - [] matches nothing
+//    - no collations
+
+struct perc_pattern {
+	int init;
+	regex_t re;
+	int just_base;
+	int perc_group;
+	char *match;
+	int matchlen;
+};
+
+void
+perc_compile(char *s, struct perc_pattern *opat)
+{
+#define ANY  do { *r++ = '['; *r++ = '^'; *r++ = '/'; *r++ = ']'; } while (0)
+
+	int braces = 0;
+	int bracelvl = 0;
+	int nthmatch = 0;
+	int just_base = 1;
+
+	char buf[1024];
+	char *r = buf;
+
+	*r++ = '^';
+
+	while (*s) {
+		switch (*s) {
+		case '*':
+			if (s[1] == '*') {
+				just_base = 0;
+				s++;
+				*r++ = '.';
+			} else {
+				ANY;
+			}
+			*r++ = '*';
+			break;
+		case '?':
+			ANY;
+			break;
+		case '%':
+			if (bracelvl > 0) {
+				fprintf(stderr, "xe: %% not supported inside {}\n");
+				exit(1);
+			}
+			braces++;
+			nthmatch = braces;
+			*r++ = '(';
+			ANY;
+			ANY;
+			*r++ = '*';
+			*r++ = ')';
+			break;
+		case '/':
+			just_base = 0;
+			*r++ = '/';
+			*r++ = '/';
+			*r++ = '*';
+			while (s[1] == '/')
+				s++;
+			break;
+		case '{':
+			braces++;
+			bracelvl++;
+			*r++ = '(';
+			break;
+		case '}':
+			if (bracelvl > 0) {
+				bracelvl--;
+				*r++ = ')';
+			} else {
+				*r++ = '}';
+			}
+			break;
+		case ',':
+			*r++ = bracelvl > 0 ? '|' : ',';
+			break;
+		case '[':
+			s++;
+			if (*s == ']') {   // [] -> X^, matches never
+				*r++ = 'X';
+				*r++ = '^';
+				break;
+			}
+			*r++ = '[';
+			if (*s == '!') {
+				s++;
+				*r++ = '^';
+			}
+			while (*s && *s != ']') {
+				// also disables [[:foo:]] and [/]
+				if (*s == '[' || *s == '/' || *s == '\\')
+					*r++ = '\\';
+				*r++ = *s++;
+			}
+			if (!*s) {
+				fprintf(stderr, "xe: unmatched [\n");
+				exit(1);
+			}
+			*r++ = ']';
+			break;
+		case '(':
+		case ')':
+		case '.':
+		case '^':
+		case '$':
+		case '\\':
+			*r++ = '\\';
+			/* FALL THROUGH */
+		default:
+			*r++ = *s;
+		}
+		s++;
+	}
+
+	*r++ = '$';
+	*r = 0;
+
+	if (bracelvl > 0) {
+		fprintf(stderr, "unmatched {\n");
+		exit(1);
+	}
+
+//	fprintf(stderr, "regex %s (match %d, base %d)\n", buf, nthmatch, just_base);
+
+	opat->init = 1;
+	opat->perc_group = nthmatch;
+	opat->just_base = just_base;
+
+	int v;
+	if ((v = regcomp(&opat->re, buf, REG_EXTENDED))) {
+		fprintf(stderr,
+		    "xe: internal regex error %d, please report a bug.\n", v);
+		exit(1);
+	}
+
+#undef ANY
+}
+
 int
-perc_match(char *pat, char *arg)
+perc_match(struct perc_pattern *pat, char *arg)
 {
-	if (!strchr(pat, '/')) {
+	regmatch_t matches[64];
+
+	if (pat->perc_group >= 64) {
+		fprintf(stderr, "xe: too many matching groups.\n");
+		exit(1);
+	}
+
+	if (pat->just_base) {
 		char *d = strrchr(arg, '/');
 		if (d)
 			arg = d + 1;
 	}
 
-	char *s = strchr(pat, '%');
-
-	if (!s)
-		return strcmp(arg, pat) == 0;
-
-	size_t la = strlen(arg);
-	size_t lp = strlen(pat);
+	if (regexec(&pat->re, arg,
+	    sizeof matches / sizeof matches[0], matches, 0) == 0) {
+		pat->match = arg + matches[pat->perc_group].rm_so;
+		pat->matchlen = (int)(matches[pat->perc_group].rm_eo -
+		    matches[pat->perc_group].rm_so);
+		return 1;
+	}
 
-	return la >= lp &&
-	    strncmp(arg, pat, s - pat) == 0 &&
-	    strncmp(arg + la - (pat + lp - (s + 1)),
-		s + 1,
-		pat + lp - (s + 1)) == 0;
+	pat->match = 0;
+	pat->matchlen = 0;
+	return 0;
 }
 
 char *
-perc_subst(char *pat, char *base, char *arg)
+perc_subst(struct perc_pattern *pat, char *base, char *arg)
 {
 	static char buf[2048];
 	size_t l;
 	char *dir = base;
 
-	if (!strchr(pat, '/')) {
+	if (arg[0] == '@' &&
+	    arg[1] == 0)
+		return base;
+
+	if (pat->just_base) {
 		char *d = strrchr(base, '/');
 		if (d)
 			base = d + 1;
 	}
 
-	char *s = strchr(pat, '%');
 	char *t = strchr(arg, '%');
-
-	if (!t)
+	if (!t || pat->matchlen == 0)
 		return arg;
 
-	if (s)
+	if (pat->perc_group)
 		l = snprintf(buf, sizeof buf, "%.*s%.*s%.*s%.*s",
 		    (int)(base - dir),
 		    dir,
@@ -410,8 +574,8 @@ perc_subst(char *pat, char *base, char *arg)
 		    (int)(t - arg),
 		    arg,
 
-		    (int)(strlen(base) - (pat + strlen(pat) - (s + 1))),
-		    base + (s - pat),
+		    pat->matchlen,
+		    pat->match,
 
 		    (int)(arg + strlen(arg) - t),
 		    t+1);
@@ -544,15 +708,28 @@ main(int argc, char *argv[], char *envp[])
 			exit(1);
 		}
 
+		int n;
+		int cases = 0;
+		for (n = optind; n < cmdend; n++)
+			if (argv[n][0] == '+' &&
+			    argv[n][1] == '\0')
+				cases++;
+
+		struct perc_pattern *patterns =
+		    calloc(cases + 1, sizeof patterns[0]);
+		if (!patterns)
+			exit(1);
+
 		while ((arg = getarg())) {
 			buflen = 0;
 			argslen = 0;
 
-			int n;
-			for (n = optind, i = n + 1; n < cmdend; n = i + 1) {
-				char *pat = argv[n];
+			cases = 0;
+			for (n = optind, i = n + 1; n < cmdend; cases++, n = i + 1) {
+				if (!patterns[cases].init)
+					perc_compile(argv[n], patterns + cases);
 
-				int matched = perc_match(pat, arg);
+				int matched = perc_match(patterns + cases, arg);
 
 				for (i = n + 1; i < cmdend; i++) {
 					if (argv[i][0] == '+' &&
@@ -567,7 +744,7 @@ main(int argc, char *argv[], char *envp[])
 					}
 
 					if (matched &&
-					    !pusharg(perc_subst(pat, arg, argv[i])))
+					    !pusharg(perc_subst(patterns + cases, arg, argv[i])))
 						toolong();
 				}