From 20dc2f79f7626201b14bb706688bb2171c7cb59c Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 25 Nov 2004 22:32:18 +0000 Subject: Update. 2004-11-23 Paolo Bonzini * posix/regcomp.c (analyze_tree): Always call calc_epsdest. (calc_inveclosure): Use re_node_set_insert_last. (parse_dup_op): Lower X{1,5} to (X(X(X(XX?)?)?)?)? rather than X?X?X?X?X?. * posix/regex_internal.h (re_node_set_insert_last): New declaration. * posix/regex_internal.c (re_node_set_insert_last): New function. * posix/PCRE.tests: Add testcases. --- posix/PCRE.tests | 19 ++++++++++++ posix/regcomp.c | 79 ++++++++++++++++++++++---------------------------- posix/regex_internal.c | 25 ++++++++++++++++ posix/regex_internal.h | 2 ++ 4 files changed, 81 insertions(+), 44 deletions(-) (limited to 'posix') diff --git a/posix/PCRE.tests b/posix/PCRE.tests index 7ea5b9e70c..0fb9cadafc 100644 --- a/posix/PCRE.tests +++ b/posix/PCRE.tests @@ -2365,3 +2365,22 @@ No match 0: bc123bc 1: bc 2: bc + +/^a{2,5}$/ + aa + 0: aa + aaa + 0: aaa + aaaa + 0: aaaa + aaaaa + 0: aaaaa + *** Failers +No match + a +No match + b +No match + aaaaab +No match + aaaaaa diff --git a/posix/regcomp.c b/posix/regcomp.c index dafad9bd0c..675f816f60 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -1269,8 +1269,8 @@ analyze_tree (dfa, node) calc_first (dfa, node); if (node->next == -1) calc_next (dfa, node); - if (node->eclosure.nelem == 0) - calc_epsdest (dfa, node); + calc_epsdest (dfa, node); + /* Calculate "first" etc. for the left child. */ if (node->left != NULL) { @@ -1626,7 +1626,7 @@ calc_inveclosure (dfa) for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) { dest = dfa->eclosures[src].elems[idx]; - re_node_set_insert (dfa->inveclosures + dest, src); + re_node_set_insert_last (dfa->inveclosures + dest, src); } } } @@ -2538,7 +2538,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err) reg_errcode_t *err; { re_token_t dup_token; - bin_tree_t *tree = NULL; + bin_tree_t *tree = NULL, *old_tree = NULL; int i, start, end, start_idx = re_string_cur_idx (regexp); re_token_t start_token = *token; @@ -2598,12 +2598,14 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err) end = (token->type == OP_DUP_QUESTION) ? 1 : -1; } + fetch_token (token, regexp, syntax); + /* Treat "{0}*" etc. as "{0}". */ - if (BE (elem == NULL, 0)) - start = end = 0; + if (BE (elem == NULL || (start == 0 && end == 0), 0)) + return NULL; /* Extract "{n,m}" to "...{0,}". */ - else if (BE (start > 0, 0)) + if (BE (start > 0, 0)) { tree = elem; for (i = 2; i <= start; ++i) @@ -2613,52 +2615,41 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err) if (BE (elem == NULL || tree == NULL, 0)) goto parse_dup_op_espace; } - } - if (BE (end != start, 1)) - { - dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION); - if (BE (start > 0, 0)) - { - elem = duplicate_tree (elem, dfa); - if (BE (elem == NULL, 0)) - goto parse_dup_op_espace; + if (start == end) + return tree; - /* This subexpression will be marked as optional, so that - empty matches do not touch the registers. */ - mark_opt_subexp (elem, dfa); + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; - /* Prepare the tree with the modifier. */ - elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token); - tree = create_tree (dfa, tree, elem, CONCAT, 0); - } - else - { - /* We do not need to duplicate the tree because we have not - created it yet. */ - mark_opt_subexp (elem, dfa); - tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token); - } + mark_opt_subexp (elem, dfa); + dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION); + tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + /* This loop is actually executed only when end != -1, + to rewrite {0,n} as ((...?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT, 0); if (BE (elem == NULL || tree == NULL, 0)) goto parse_dup_op_espace; - /* This loop is actually executed only when end != -1, - to rewrite {0,n} as ???... We have - already created the start+1-th copy. */ - for (i = start + 2; i <= end; ++i) - { - elem = duplicate_tree (elem, dfa); - tree = create_tree (dfa, tree, elem, CONCAT, 0); - if (BE (elem == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } + tree = re_dfa_add_tree_node (dfa, tree, NULL, &dup_token); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; } - fetch_token (token, regexp, syntax); + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT, 0); + return tree; parse_dup_op_espace: diff --git a/posix/regex_internal.c b/posix/regex_internal.c index bb1d73d9a0..cb439e5d7c 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -1250,6 +1250,31 @@ re_node_set_insert (set, elem) return 1; } +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +re_node_set_insert_last (set, elem) + re_node_set *set; + int elem; +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_array; + set->alloc = (set->alloc + 1) * 2; + new_array = re_realloc (set->elems, int, set->alloc); + if (BE (new_array == NULL, 0)) + return -1; + set->elems = new_array; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + /* Compare two node sets SET1 and SET2. return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ diff --git a/posix/regex_internal.h b/posix/regex_internal.h index a778032d77..703d409eb8 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -668,6 +668,8 @@ static reg_errcode_t re_node_set_init_union (re_node_set *dest, static reg_errcode_t re_node_set_merge (re_node_set *dest, const re_node_set *src) internal_function; static int re_node_set_insert (re_node_set *set, int elem) internal_function; +static int re_node_set_insert_last (re_node_set *set, + int elem) internal_function; static int re_node_set_compare (const re_node_set *set1, const re_node_set *set2) internal_function; static int re_node_set_contains (const re_node_set *set, int elem) internal_function; -- cgit 1.4.1