diff options
author | Ulrich Drepper <drepper@redhat.com> | 2004-12-10 04:37:58 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2004-12-10 04:37:58 +0000 |
commit | 5cf1ec52565b19d68c3c0fbd853c0b5de26b27b2 (patch) | |
tree | c93f9b9778da05c5d67f29870f1e263963e64ef5 /posix/regexec.c | |
parent | dc165f7b0bfa73ebd64584331d0cb7c2ead66147 (diff) | |
download | glibc-5cf1ec52565b19d68c3c0fbd853c0b5de26b27b2.tar.gz glibc-5cf1ec52565b19d68c3c0fbd853c0b5de26b27b2.tar.xz glibc-5cf1ec52565b19d68c3c0fbd853c0b5de26b27b2.zip |
Update.
2004-12-07 Paolo Bonzini <bonzini@gnu.org> * posix/regexec.c (proceed_next_node): Simplify treatment of epsilon nodes. Pass the pushed node to push_fail_stack. (push_fail_stack): Accept a single node rather than an array of two epsilon destinations. (build_sifted_states): Only walk non-epsilon nodes. (check_arrival): Don't pass epsilon nodes to check_arrival_add_next_nodes. (check_arrival_add_next_nodes) [DEBUG]: Abort if an epsilon node is found. (check_node_accept): Do expensive checks later. (add_epsilon_src_nodes): Cache result of merging the inveclosures. * posix/regex_internal.h (re_dfastate_t): Add non_eps_nodes and inveclosure. (re_string_elem_size_at, re_string_char_size_at, re_string_wchar_at, re_string_context_at, re_string_peek_byte_case, re_string_fetch_byte_case, re_node_set_compare, re_node_set_contains): Declare as pure. * posix/regex_internal.c (create_newstate_common): Remove. (register_state): Move part of it here. Initialize non_eps_nodes. (free_state): Free inveclosure and non_eps_nodes. (create_cd_newstate, create_ci_newstate): Allocate the new re_dfastate_t here.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r-- | posix/regexec.c | 138 |
1 files changed, 81 insertions, 57 deletions
diff --git a/posix/regexec.c b/posix/regexec.c index 22b806439b..91b48dd4a2 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -73,7 +73,7 @@ static int proceed_next_node (const re_match_context_t *mctx, int *pidx, int node, re_node_set *eps_via_nodes, struct re_fail_stack_t *fs) internal_function; static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, - int str_idx, int *dests, int nregs, + int str_idx, int dest_node, int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) internal_function; static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, @@ -1223,30 +1223,38 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) if (IS_EPSILON_NODE (dfa->nodes[node].type)) { re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; - int ndest, dest_nodes[2]; + re_node_set *edests = &dfa->edests[node]; + int dest_node; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -2; - /* Pick up valid destinations. */ - for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i) + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) { - int candidate = dfa->edests[node].elems[i]; + int candidate = edests->elems[i]; if (!re_node_set_contains (cur_nodes, candidate)) continue; - dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0]; - dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1]; - ++ndest; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } } - if (ndest <= 1) - return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0); - /* In order to avoid infinite loop like "(a*)*". */ - if (re_node_set_contains (eps_via_nodes, dest_nodes[0])) - return dest_nodes[1]; - if (fs != NULL - && push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, - eps_via_nodes)) - return -2; - return dest_nodes[0]; + return dest_node; } else { @@ -1304,9 +1312,9 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) } static reg_errcode_t -push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes) +push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes) struct re_fail_stack_t *fs; - int str_idx, *dests, nregs; + int str_idx, dest_node, nregs; regmatch_t *regs; re_node_set *eps_via_nodes; { @@ -1323,7 +1331,7 @@ push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes) fs->stack = new_array; } fs->stack[num].idx = str_idx; - fs->stack[num].node = dests[1]; + fs->stack[num].node = dest_node; fs->stack[num].regs = re_malloc (regmatch_t, nregs); if (fs->stack[num].regs == NULL) return REG_ESPACE; @@ -1600,7 +1608,7 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest) re_node_set *cur_dest; { re_dfa_t *const dfa = mctx->dfa; - re_node_set *cur_src = &mctx->state_log[str_idx]->nodes; + re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; int i; /* Then build the next sifted state. @@ -1608,16 +1616,20 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest) `sifted_states[str_idx]' with `cur_dest'. Note: `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]'. */ + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ for (i = 0; i < cur_src->nelem; i++) { int prev_node = cur_src->elems[i]; int naccepted = 0; - re_token_type_t type = dfa->nodes[prev_node].type; int ret; - if (IS_EPSILON_NODE (type)) - continue; +#if defined DEBUG || defined RE_ENABLE_I18N + re_token_type_t type = dfa->nodes[prev_node].type; +#endif +#ifdef DEBUG + assert (!IS_EPSILON_NODE (type)); +#endif #ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ if (ACCEPT_MB_NODE (type)) @@ -1764,26 +1776,24 @@ add_epsilon_src_nodes (dfa, dest_nodes, candidates) re_node_set *dest_nodes; const re_node_set *candidates; { - reg_errcode_t err; - int src_idx; - re_node_set src_copy; + reg_errcode_t err = REG_NOERROR; + int i; - err = re_node_set_init_copy (&src_copy, dest_nodes); + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); if (BE (err != REG_NOERROR, 0)) return err; - for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx) + + if (!state->inveclosure.alloc) { - err = re_node_set_add_intersect (dest_nodes, candidates, - dfa->inveclosures - + src_copy.elems[src_idx]); + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&src_copy); - return err; - } + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); } - re_node_set_free (&src_copy); - return REG_NOERROR; + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); } static reg_errcode_t @@ -2935,7 +2945,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, if (cur_state) { err = check_arrival_add_next_nodes (mctx, str_idx, - &cur_state->nodes, &next_nodes); + &cur_state->non_eps_nodes, &next_nodes); if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); @@ -3009,9 +3019,12 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes) { int naccepted = 0; int cur_node = cur_nodes->elems[cur_idx]; +#if defined DEBUG || defined RE_ENABLE_I18N re_token_type_t type = dfa->nodes[cur_node].type; - if (IS_EPSILON_NODE (type)) - continue; +#endif +#ifdef DEBUG + assert (!IS_EPSILON_NODE (type)); +#endif #ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ if (ACCEPT_MB_NODE (type)) @@ -3988,24 +4001,20 @@ check_node_accept (mctx, node, idx) const re_token_t *node; int idx; { - re_dfa_t *const dfa = mctx->dfa; unsigned char ch; - if (node->constraint) - { - /* The node has constraints. Check whether the current context - satisfies the constraints. */ - unsigned int context = re_string_context_at (&mctx->input, idx, - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - return 0; - } ch = re_string_byte_at (&mctx->input, idx); switch (node->type) { case CHARACTER: - return node->opr.c == ch; + if (node->opr.c != ch) + return 0; + break; + case SIMPLE_BRACKET: - return bitset_contain (node->opr.sbcset, ch); + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + #ifdef RE_ENABLE_I18N case OP_UTF8_PERIOD: if (ch >= 0x80) @@ -4013,11 +4022,26 @@ check_node_accept (mctx, node, idx) /* FALLTHROUGH */ #endif case OP_PERIOD: - return !((ch == '\n' && !(dfa->syntax & RE_DOT_NEWLINE)) - || (ch == '\0' && (dfa->syntax & RE_DOT_NOT_NULL))); + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + default: return 0; } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; } /* Extend the buffers, if the buffers have run out. */ |