about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2015-04-18 16:47:17 +0000
committerRich Felker <dalias@aerifal.cx>2016-01-30 20:53:17 -0500
commitda4cc13b9705e7d3a02216959b9711b3b30828c1 (patch)
tree40b725270f05a2673495e6c72ce78acd85d68ada /src
parent7eaa76fc2e7993582989d3838b1ac32dd8abac09 (diff)
downloadmusl-da4cc13b9705e7d3a02216959b9711b3b30828c1.tar.gz
musl-da4cc13b9705e7d3a02216959b9711b3b30828c1.tar.xz
musl-da4cc13b9705e7d3a02216959b9711b3b30828c1.zip
regex: treat \| in BRE as alternation
The standard does not define semantics for \| in BRE, but some code
depends on it meaning alternation. Empty alternative expression is
allowed to be consistent with ERE.

Based on a patch by Rob Landley.
Diffstat (limited to 'src')
-rw-r--r--src/regex/regcomp.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
index 078f657c..f1f06afe 100644
--- a/src/regex/regcomp.c
+++ b/src/regex/regcomp.c
@@ -841,6 +841,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
 			/* reject repetitions after empty expression in BRE */
 			if (!ere)
 				return REG_BADRPT;
+		case '|':
+			/* extension: treat \| as alternation in BRE */
+			if (!ere) {
+				node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+				s--;
+				goto end;
+			}
+			/* fallthrough */
 		default:
 			if (!ere && (unsigned)*s-'1' < 9) {
 				/* back reference */
@@ -918,6 +926,7 @@ parse_literal:
 		s += len;
 		break;
 	}
+end:
 	if (!node)
 		return REG_ESPACE;
 	ctx->n = node;
@@ -1016,13 +1025,20 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 		if ((ere && *s == '|') ||
 		    (ere && *s == ')' && depth) ||
 		    (!ere && *s == '\\' && s[1] == ')') ||
+		    /* extension: treat \| as alternation in BRE */
+		    (!ere && *s == '\\' && s[1] == '|') ||
 		    !*s) {
 			/* extension: empty branch is unspecified (), (|a), (a|)
 			   here they are not rejected but match on empty string */
 			int c = *s;
 			nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
 			nbranch = 0;
-			if (c != '|') {
+
+			if (c == '\\' && s[1] == '|') {
+				s+=2;
+			} else if (c == '|') {
+				s++;
+			} else {
 				if (c == '\\') {
 					if (!depth) return REG_EPAREN;
 					s+=2;
@@ -1042,7 +1058,6 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 				nunion = tre_stack_pop_voidptr(stack);
 				goto parse_iter;
 			}
-			s++;
 		}
 	}
 }