diff options
author | Ulrich Drepper <drepper@redhat.com> | 2002-10-12 08:34:26 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2002-10-12 08:34:26 +0000 |
commit | 485d775dd578f2c2528d10d7618f09c45ffe6840 (patch) | |
tree | dde239879b15fbec3536cede5e8019211495f84b | |
parent | cc12f2a442d1d971d6ec0c21bdfa578299601fc7 (diff) | |
download | glibc-485d775dd578f2c2528d10d7618f09c45ffe6840.tar.gz glibc-485d775dd578f2c2528d10d7618f09c45ffe6840.tar.xz glibc-485d775dd578f2c2528d10d7618f09c45ffe6840.zip |
Update.
2002-10-11 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_fastmap_iter): Remove the handling OP_CONTEXT_NODE. (regfree): Likewise. (create_initial_state): Likewise. (analyze): Remove the substitutions which became useless. (calc_first): Likewise. (calc_epsdest): Use edests of OP_BACK_REF in case that it has epsilon destination. (duplicate_node_closure): New function. (duplicate_node): Remove the handling OP_CONTEXT_NODE. (calc_inveclosure): Likewise. (calc_eclosure): Likewise. (calc_eclosure_iter): Invoke duplicate_node_closure instead of direct invocation of duplicate_node. (parse): Don't use comma operator in the return to avoid compiler warning. (parse_reg_exp): Likewise. (parse_branch): Likewise. (parse_expression): Likewise. (parse_sub_exp): Likewise. (parse_dup_op): Likewise. * posix/regex_internal.c (re_dfa_add_node): Remove the substitutions which became useless. (create_ci_newstate): Remove the handling OP_CONTEXT_NODE. (create_cd_newstate): Likewise. * posix/regex_internal.h (re_token_type_t): Remove the obsolete type. (re_token_t): Likewise. (re_dfa_t): Likewise. (re_node_set_remove): New macro. * posix/regexec.c (check_matching): Remove the handling OP_CONTEXT_NODE. (check_halt_node_context): Likewise. (proceed_next_node): Likewise. (pop_fail_stack): Fix the memory leak. (set_regs): Likewise. (free_fail_stack_return): New function. (sift_states_backward): Fix the memory leak. Remove the handling OP_CONTEXT_NODE. (update_cur_sifted_state): Append some if clause to avoid redundant call. (sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a back reference. (check_dst_limits): Remove the handling OP_CONTEXT_NODE. (check_subexp_limits): Likewise. (search_subexp): Likewise. (sift_states_bkref): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (transit_state_bkref_loop): Likewise. (group_nodes_into_DFAstates): Likewise. (check_node_accept): Likewise. (sift_ctx_init): Add initializing. 2002-10-12 Ulrich Drepper <drepper@redhat.com> * sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use __builtin_expect.
-rw-r--r-- | ChangeLog | 60 | ||||
-rw-r--r-- | posix/regcomp.c | 365 | ||||
-rw-r--r-- | posix/regex_internal.c | 38 | ||||
-rw-r--r-- | posix/regex_internal.h | 9 | ||||
-rw-r--r-- | posix/regexec.c | 236 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/i386/sysdep.h | 4 |
6 files changed, 365 insertions, 347 deletions
diff --git a/ChangeLog b/ChangeLog index 1842411d1d..14e6dddb7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,63 @@ +2002-10-11 Isamu Hasegawa <isamu@yamato.ibm.com> + + * posix/regcomp.c (re_compile_fastmap_iter): Remove the handling + OP_CONTEXT_NODE. + (regfree): Likewise. + (create_initial_state): Likewise. + (analyze): Remove the substitutions which became useless. + (calc_first): Likewise. + (calc_epsdest): Use edests of OP_BACK_REF in case that it has + epsilon destination. + (duplicate_node_closure): New function. + (duplicate_node): Remove the handling OP_CONTEXT_NODE. + (calc_inveclosure): Likewise. + (calc_eclosure): Likewise. + (calc_eclosure_iter): Invoke duplicate_node_closure instead of + direct invocation of duplicate_node. + (parse): Don't use comma operator in the return to avoid compiler + warning. + (parse_reg_exp): Likewise. + (parse_branch): Likewise. + (parse_expression): Likewise. + (parse_sub_exp): Likewise. + (parse_dup_op): Likewise. + * posix/regex_internal.c (re_dfa_add_node): Remove the substitutions + which became useless. + (create_ci_newstate): Remove the handling OP_CONTEXT_NODE. + (create_cd_newstate): Likewise. + * posix/regex_internal.h (re_token_type_t): Remove the obsolete type. + (re_token_t): Likewise. + (re_dfa_t): Likewise. + (re_node_set_remove): New macro. + * posix/regexec.c (check_matching): Remove the handling + OP_CONTEXT_NODE. + (check_halt_node_context): Likewise. + (proceed_next_node): Likewise. + (pop_fail_stack): Fix the memory leak. + (set_regs): Likewise. + (free_fail_stack_return): New function. + (sift_states_backward): Fix the memory leak. Remove the handling + OP_CONTEXT_NODE. + (update_cur_sifted_state): Append some if clause to avoid redundant + call. + (sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a + back reference. + (check_dst_limits): Remove the handling OP_CONTEXT_NODE. + (check_subexp_limits): Likewise. + (search_subexp): Likewise. + (sift_states_bkref): Likewise. + (transit_state_mb): Likewise. + (transit_state_bkref_loop): Likewise. + (transit_state_bkref_loop): Likewise. + (group_nodes_into_DFAstates): Likewise. + (check_node_accept): Likewise. + (sift_ctx_init): Add initializing. + +2002-10-12 Ulrich Drepper <drepper@redhat.com> + + * sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use + __builtin_expect. + 2002-10-11 Ulrich Drepper <drepper@redhat.com> * elf/dl-load.c (_dl_map_object_from_fd): Remove unnecessarily diff --git a/posix/regcomp.c b/posix/regcomp.c index 7917c64727..715dd0f9a1 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -85,6 +85,9 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node); static void calc_first (re_dfa_t *dfa, bin_tree_t *node); static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); +static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, + int top_clone_node, int root_node, + unsigned int constraint); static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); @@ -351,11 +354,6 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) { int node = init_state->nodes.elems[node_cnt]; re_token_type_t type = dfa->nodes[node].type; - if (type == OP_CONTEXT_NODE) - { - node = dfa->nodes[node].opr.ctx_info->entity; - type = dfa->nodes[node].type; - } if (type == CHARACTER) fastmap[dfa->nodes[node].opr.c] = 1; @@ -587,18 +585,7 @@ regfree (preg) #endif /* RE_ENABLE_I18N */ if (node->type == SIMPLE_BRACKET && node->duplicated == 0) re_free (node->opr.sbcset); - else if (node->type == OP_CONTEXT_NODE) - { - if (dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF) - { - if (node->opr.ctx_info->bkref_eclosure != NULL) - re_node_set_free (node->opr.ctx_info->bkref_eclosure); - re_free (node->opr.ctx_info->bkref_eclosure); - } - re_free (node->opr.ctx_info); - } } - re_free (dfa->firsts); re_free (dfa->nexts); for (i = 0; i < dfa->nodes_len; ++i) { @@ -883,39 +870,25 @@ create_initial_state (dfa) re_token_type_t type = dfa->nodes[node_idx].type; int clexp_idx; - int entity = (type != OP_CONTEXT_NODE ? node_idx - : dfa->nodes[node_idx].opr.ctx_info->entity); - if ((type != OP_CONTEXT_NODE - || (dfa->nodes[entity].type != OP_BACK_REF)) - && (type != OP_BACK_REF)) + if (type != OP_BACK_REF) continue; for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) { re_token_t *clexp_node; clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; if (clexp_node->type == OP_CLOSE_SUBEXP - && clexp_node->opr.idx + 1 == dfa->nodes[entity].opr.idx) + && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx) break; } if (clexp_idx == init_nodes.nelem) continue; - if (type == OP_CONTEXT_NODE - && (dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type - == OP_BACK_REF)) - { - int prev_nelem = init_nodes.nelem; - re_node_set_merge (&init_nodes, - dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure); - if (prev_nelem < init_nodes.nelem) - i = 0; - } - else if (type == OP_BACK_REF) + if (type == OP_BACK_REF) { - int next_idx = dfa->nexts[node_idx]; - if (!re_node_set_contains (&init_nodes, next_idx)) + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) { - re_node_set_merge (&init_nodes, dfa->eclosures + next_idx); + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); i = 0; } } @@ -959,18 +932,16 @@ analyze (dfa) reg_errcode_t ret; /* Allocate arrays. */ - dfa->firsts = re_malloc (int, dfa->nodes_alloc); dfa->nexts = re_malloc (int, dfa->nodes_alloc); dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc); - if (BE (dfa->firsts == NULL || dfa->nexts == NULL || dfa->edests == NULL + if (BE (dfa->nexts == NULL || dfa->edests == NULL || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) return REG_ESPACE; /* Initialize them. */ for (i = 0; i < dfa->nodes_len; ++i) { - dfa->firsts[i] = -1; dfa->nexts[i] = -1; re_node_set_init_empty (dfa->edests + i); re_node_set_init_empty (dfa->eclosures + i); @@ -1083,8 +1054,6 @@ calc_first (dfa, node) node->first = node->left->first; break; } - if (node->type == 0) - dfa->firsts[idx] = node->first; } /* Calculate "next" for the node NODE. */ @@ -1187,11 +1156,114 @@ calc_epsdest (dfa, node) } else if (dfa->nodes[idx].type == ANCHOR || dfa->nodes[idx].type == OP_OPEN_SUBEXP - || dfa->nodes[idx].type == OP_CLOSE_SUBEXP) + || dfa->nodes[idx].type == OP_CLOSE_SUBEXP + || dfa->nodes[idx].type == OP_BACK_REF) re_node_set_init_1 (dfa->edests + idx, node->next); } } +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, + init_constraint) + re_dfa_t *dfa; + int top_org_node, top_clone_node, root_node; + unsigned int init_constraint; +{ + reg_errcode_t err; + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + err = duplicate_node (&clone_dest, dfa, org_dest, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + if (dfa->nodes[org_node].type == ANCHOR) + { + /* In case of the node has another constraint, append it. */ + if (org_node == root_node && clone_node != org_node) + { + /* ...but if the node is root_node itself, it means the + epsilon closure have a loop, then tie it to the + destination of the root_node. */ + ret = re_node_set_insert (dfa->edests + clone_node, + org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + constraint |= dfa->nodes[org_node].opr.ctx_type; + } + err = duplicate_node (&clone_dest, dfa, org_dest, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + err = duplicate_node (&clone_dest, dfa, org_dest, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + + err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node, + constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + + org_dest = dfa->edests[org_node].elems[1]; + err = duplicate_node (&clone_dest, dfa, org_dest, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, otherwise return the error code. */ @@ -1204,50 +1276,18 @@ duplicate_node (new_idx, dfa, org_idx, constraint) { re_token_t dup; int dup_idx; - reg_errcode_t err; - - dup.type = OP_CONTEXT_NODE; - if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE) - { - /* If the node whose index is ORG_IDX is the same as the intended - node, use it. */ - if (dfa->nodes[org_idx].constraint == constraint) - { - *new_idx = org_idx; - return REG_NOERROR; - } - dup.constraint = constraint | - dfa->nodes[org_idx].constraint; - } - else - dup.constraint = constraint; - /* In case that `entity' points OP_CONTEXT_NODE, - we correct `entity' to real entity in calc_inveclosures(). */ - dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info)); + dup = dfa->nodes[org_idx]; dup_idx = re_dfa_add_node (dfa, dup, 1); - if (BE (dup.opr.ctx_info == NULL || dup_idx == -1, 0)) + if (BE (dup_idx == -1, 0)) return REG_ESPACE; - dup.opr.ctx_info->entity = org_idx; - dup.opr.ctx_info->bkref_eclosure = NULL; - + dfa->nodes[dup_idx].constraint = constraint; + if (dfa->nodes[org_idx].type == ANCHOR) + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; dfa->nodes[dup_idx].duplicated = 1; - dfa->firsts[dup_idx] = dfa->firsts[org_idx]; - dfa->nexts[dup_idx] = dfa->nexts[org_idx]; - err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - /* Since we don't duplicate epsilon nodes, epsilon closure have - only itself. */ - err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - /* Then we must update inveclosure for this node. - We process them at last part of calc_eclosure(), - since we don't complete to calculate them here. */ + re_node_set_init_empty (dfa->edests + dup_idx); + re_node_set_init_empty (dfa->eclosures + dup_idx); + re_node_set_init_empty (dfa->inveclosures + dup_idx); *new_idx = dup_idx; return REG_NOERROR; @@ -1257,7 +1297,7 @@ static void calc_inveclosure (dfa) re_dfa_t *dfa; { - int src, idx, dest, entity; + int src, idx, dest; for (src = 0; src < dfa->nodes_len; ++src) { for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) @@ -1265,15 +1305,6 @@ calc_inveclosure (dfa) dest = dfa->eclosures[src].elems[idx]; re_node_set_insert (dfa->inveclosures + dest, src); } - - entity = src; - while (dfa->nodes[entity].type == OP_CONTEXT_NODE) - { - entity = dfa->nodes[entity].opr.ctx_info->entity; - re_node_set_merge (dfa->inveclosures + src, - dfa->inveclosures + entity); - dfa->nodes[src].opr.ctx_info->entity = entity; - } } } @@ -1283,16 +1314,17 @@ static reg_errcode_t calc_eclosure (dfa) re_dfa_t *dfa; { - int idx, node_idx, max, incomplete = 0; + int node_idx, incomplete; #ifdef DEBUG assert (dfa->nodes_len > 0); #endif + incomplete = 0; /* For each nodes, calculate epsilon closure. */ - for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx) + for (node_idx = 0; ; ++node_idx) { reg_errcode_t err; re_node_set eclosure_elem; - if (node_idx == max) + if (node_idx == dfa->nodes_len) { if (!incomplete) break; @@ -1301,7 +1333,6 @@ calc_eclosure (dfa) } #ifdef DEBUG - assert (dfa->nodes[node_idx].type != OP_CONTEXT_NODE); assert (dfa->eclosures[node_idx].nelem != -1); #endif /* If we have already calculated, skip it. */ @@ -1318,41 +1349,6 @@ calc_eclosure (dfa) re_node_set_free (&eclosure_elem); } } - - /* for duplicated nodes. */ - for (idx = max; idx < dfa->nodes_len; ++idx) - { - int entity, i, constraint; - re_node_set *bkref_eclosure; - entity = dfa->nodes[idx].opr.ctx_info->entity; - re_node_set_merge (dfa->inveclosures + idx, dfa->inveclosures + entity); - if (dfa->nodes[entity].type != OP_BACK_REF) - continue; - - /* If the node is backreference, duplicate the epsilon closure of - the next node. Since it may epsilon transit. */ - /* Note: duplicate_node() may realloc dfa->eclosures, etc. */ - bkref_eclosure = re_malloc (re_node_set, 1); - if (BE (bkref_eclosure == NULL, 0)) - return REG_ESPACE; - re_node_set_init_empty (bkref_eclosure); - constraint = dfa->nodes[idx].constraint; - for (i = 0; i < dfa->eclosures[dfa->nexts[idx]].nelem; ++i) - { - int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i]; - if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type)) - { - reg_errcode_t err; - err = duplicate_node (&dest_node_idx, dfa, dest_node_idx, - constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - re_node_set_insert (bkref_eclosure, dest_node_idx); - } - dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure; - } - return REG_NOERROR; } @@ -1366,8 +1362,9 @@ calc_eclosure_iter (new_set, dfa, node, root) { reg_errcode_t err; unsigned int constraint; - int i, max, incomplete = 0; + int i, incomplete; re_node_set eclosure; + incomplete = 0; err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -1378,9 +1375,19 @@ calc_eclosure_iter (new_set, dfa, node, root) constraint = ((dfa->nodes[node].type == ANCHOR) ? dfa->nodes[node].opr.ctx_type : 0); + /* If the current node has constraints, duplicate all nodes. + Since they must inherit the constraints. */ + if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + int org_node, cur_node; + org_node = cur_node = node; + err = duplicate_node_closure (dfa, node, node, node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } /* Expand each epsilon destination nodes. */ - if (dfa->edests[node].nelem != 0) + if (IS_EPSILON_NODE(dfa->nodes[node].type)) for (i = 0; i < dfa->edests[node].nelem; ++i) { re_node_set eclosure_elem; @@ -1413,28 +1420,6 @@ calc_eclosure_iter (new_set, dfa, node, root) } } - /* If the current node has constraints, duplicate all non-epsilon nodes. - Since they must inherit the constraints. */ - if (constraint) - for (i = 0, max = eclosure.nelem; i < max; ++i) - { - int dest = eclosure.elems[i]; - if (!IS_EPSILON_NODE (dfa->nodes[dest].type)) - { - int dup_dest; - reg_errcode_t err; - err = duplicate_node (&dup_dest, dfa, dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - if (dest != dup_dest) - { - re_node_set_remove_at (&eclosure, i--); - re_node_set_insert (&eclosure, dup_dest); - --max; - } - } - } - /* Epsilon closures include itself. */ re_node_set_insert (&eclosure, node); if (incomplete && !root) @@ -1793,7 +1778,10 @@ parse (regexp, preg, syntax, err) else root = eor; if (BE (new_idx == -1 || eor == NULL || root == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } return root; } @@ -1841,7 +1829,10 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err) branch = NULL; tree = create_tree (tree, branch, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } dfa->has_plural_match = 1; } return tree; @@ -1883,7 +1874,10 @@ parse_branch (regexp, preg, token, syntax, nest, err) { tree = create_tree (tree, exp, CONCAT, 0); if (tree == NULL) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } } else if (tree == NULL) tree = exp; @@ -1916,7 +1910,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { @@ -1954,7 +1951,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } ++dfa->nbackref; dfa->has_mb_node = 1; break; @@ -1963,7 +1963,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) case OP_DUP_QUESTION: case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_OPS) - return *err = REG_BADRPT, NULL; + { + *err = REG_BADRPT; + return NULL; + } else if (syntax & RE_CONTEXT_INDEP_OPS) { *token = fetch_token (regexp, syntax); @@ -1973,7 +1976,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) case OP_CLOSE_SUBEXP: if ((token->type == OP_CLOSE_SUBEXP) && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) - return *err = REG_ERPAREN, NULL; + { + *err = REG_ERPAREN; + return NULL; + } /* else fall through */ case OP_CLOSE_DUP_NUM: /* We treat it as a normal character. */ @@ -1983,7 +1989,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } break; case ANCHOR: if (dfa->word_char == NULL) @@ -2008,7 +2017,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1 || tree_first == NULL || tree_last == NULL || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } } else { @@ -2027,7 +2039,10 @@ parse_expression (regexp, preg, token, syntax, nest, err) new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } if (MB_CUR_MAX > 1) dfa->has_mb_node = 1; break; @@ -2108,7 +2123,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err) new_idx = re_dfa_add_node (dfa, *token, 0); left_par = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || left_par == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } dfa->nodes[new_idx].opr.idx = cur_nsub; *token = fetch_token (regexp, syntax); @@ -2134,7 +2152,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err) : create_tree (tree, right_par, CONCAT, 0)); tree = create_tree (left_par, tree, CONCAT, 0); if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } dfa->nodes[new_idx].opr.idx = cur_nsub; return tree; @@ -2252,7 +2273,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) work_tree = duplicate_tree (elem, dfa); tree = create_tree (tree, work_tree, CONCAT, 0); if (BE (work_tree == NULL || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } } } } @@ -2261,7 +2285,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (tree, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) - return *err = REG_ESPACE, NULL; + { + *err = REG_ESPACE; + return NULL; + } } *token = fetch_token (regexp, syntax); return tree; diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 92f59b4c9d..69fb9a65f5 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -929,20 +929,18 @@ re_dfa_add_node (dfa, token, mode) dfa->nodes = new_array; if (mode) { - int *new_firsts, *new_nexts; + int *new_nexts; re_node_set *new_edests, *new_eclosures, *new_inveclosures; - new_firsts = re_realloc (dfa->firsts, int, dfa->nodes_alloc); new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); new_eclosures = re_realloc (dfa->eclosures, re_node_set, dfa->nodes_alloc); new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, dfa->nodes_alloc); - if (BE (new_firsts == NULL || new_nexts == NULL || new_edests == NULL + if (BE (new_nexts == NULL || new_edests == NULL || new_eclosures == NULL || new_inveclosures == NULL, 0)) return -1; - dfa->firsts = new_firsts; dfa->nexts = new_nexts; dfa->edests = new_edests; dfa->eclosures = new_eclosures; @@ -951,6 +949,7 @@ re_dfa_add_node (dfa, token, mode) } dfa->nodes[dfa->nodes_len] = token; dfa->nodes[dfa->nodes_len].duplicated = 0; + dfa->nodes[dfa->nodes_len].constraint = 0; return dfa->nodes_len++; } @@ -1126,7 +1125,7 @@ create_ci_newstate (dfa, nodes, hash) { re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; - if (type == CHARACTER) + if (type == CHARACTER && !node->constraint) continue; /* If the state has the halt node, the state is a halt state. */ @@ -1139,13 +1138,8 @@ create_ci_newstate (dfa, nodes, hash) #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) newstate->has_backref = 1; - else if (type == ANCHOR || OP_CONTEXT_NODE) - { - newstate->has_constraint = 1; - if (type == OP_CONTEXT_NODE - && dfa->nodes[node->opr.ctx_info->entity].type == END_OF_RE) - newstate->halt = 1; - } + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; } err = register_state (dfa, newstate, hash); return (err != REG_NOERROR) ? NULL : newstate; @@ -1175,9 +1169,11 @@ create_cd_newstate (dfa, nodes, context, hash) unsigned int constraint = 0; re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; - if (type == CHARACTER) - continue; + if (node->constraint) + constraint = node->constraint; + if (type == CHARACTER && !constraint) + continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) newstate->halt = 1; @@ -1190,20 +1186,6 @@ create_cd_newstate (dfa, nodes, context, hash) newstate->has_backref = 1; else if (type == ANCHOR) constraint = node->opr.ctx_type; - else if (type == OP_CONTEXT_NODE) - { - re_token_type_t ctype = dfa->nodes[node->opr.ctx_info->entity].type; - constraint = node->constraint; - if (ctype == END_OF_RE) - newstate->halt = 1; - else if (ctype == OP_BACK_REF) - newstate->has_backref = 1; -#ifdef RE_ENABLE_I18N - else if (ctype == COMPLEX_BRACKET - || (type == OP_PERIOD && MB_CUR_MAX > 1)) - newstate->accept_mb = 1; -#endif /* RE_ENABLE_I18N */ - } if (constraint) { diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 5aef684acc..7193ea0315 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -133,7 +133,6 @@ typedef enum OP_DUP_QUESTION, OP_BACK_REF, ANCHOR, - OP_CONTEXT_NODE, /* Dummy marker. */ END_OF_RE_TOKEN_T @@ -198,11 +197,6 @@ typedef struct #endif /* RE_ENABLE_I18N */ int idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ - struct - { - int entity; /* for OP_CONTEXT_NODE, index of the entity */ - re_node_set *bkref_eclosure; - } *ctx_info; } opr; #if __GNUC__ >= 2 re_token_type_t type : 8; @@ -474,7 +468,6 @@ struct re_dfa_t int nodes_alloc; int nodes_len; bin_tree_t *str_tree; - int *firsts; int *nexts; re_node_set *edests; re_node_set *eclosures; @@ -519,6 +512,8 @@ static int re_node_set_compare (const re_node_set *set1, const re_node_set *set2); static int re_node_set_contains (const re_node_set *set, int elem); static void re_node_set_remove_at (re_node_set *set, int idx); +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) #define re_node_set_empty(p) ((p)->nelem = 0) #define re_node_set_free(set) re_free ((set)->elems) static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode); diff --git a/posix/regexec.c b/posix/regexec.c index 7e3a8320b1..5c80f19f4e 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -95,6 +95,8 @@ static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, int fl_backtrack); +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); + #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const regex_t *preg, const re_match_context_t *mctx, @@ -845,11 +847,8 @@ check_matching (preg, mctx, fl_search, fl_longest_match) re_dfa_t *dfa = (re_dfa_t *) preg->buffer; for (i = 0; i < cur_state->nodes.nelem; ++i) { - re_token_type_t type; int node = cur_state->nodes.elems[i]; - int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node - : dfa->nodes[node].opr.ctx_info->entity); - type = dfa->nodes[entity].type; + re_token_type_t type = dfa->nodes[node].type; if (type == OP_BACK_REF) { int clexp_idx; @@ -859,7 +858,7 @@ check_matching (preg, mctx, fl_search, fl_longest_match) re_token_t *clexp_node; clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx]; if (clexp_node->type == OP_CLOSE_SUBEXP - && clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx) + && clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx) { err = match_ctx_add_entry (mctx, node, 0, 0, 0); if (BE (err != REG_NOERROR, 0)) @@ -955,15 +954,13 @@ static int check_halt_node_context (dfa, node, context) int node; unsigned int context; { - int entity; re_token_type_t type = dfa->nodes[node].type; - if (type == END_OF_RE) - return 1; - if (type != OP_CONTEXT_NODE) + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) return 0; - entity = dfa->nodes[node].opr.ctx_info->entity; - if (dfa->nodes[entity].type != END_OF_RE - || NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[node].constraint, context)) + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) return 0; return 1; } @@ -1008,39 +1005,27 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs) struct re_fail_stack_t *fs; { re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int i, err, dest_node, cur_entity; + int i, err, dest_node; dest_node = -1; - cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE) - ? dfa->nodes[node].opr.ctx_info->entity : node); if (IS_EPSILON_NODE (dfa->nodes[node].type)) { - int ndest, dest_nodes[2], dest_entities[2]; + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + int ndest, dest_nodes[2]; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -1; /* Pick up valid destinations. */ - for (ndest = 0, i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i) + for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i) { - int candidate = mctx->state_log[*pidx]->nodes.elems[i]; - int entity; - entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE) - ? dfa->nodes[candidate].opr.ctx_info->entity : candidate); - if (!re_node_set_contains (dfa->edests + node, entity)) + int candidate = dfa->edests[node].elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) continue; dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0]; - dest_entities[0] = (ndest == 0) ? entity : dest_entities[0]; dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1]; - dest_entities[1] = (ndest == 1) ? entity : dest_entities[1]; ++ndest; } if (ndest <= 1) return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0); - if (dest_entities[0] > dest_entities[1]) - { - int swap_work = dest_nodes[0]; - dest_nodes[0] = dest_nodes[1]; - dest_nodes[1] = swap_work; - } /* In order to avoid infinite loop like "(a*)*". */ if (re_node_set_contains (eps_via_nodes, dest_nodes[0])) return dest_nodes[1]; @@ -1050,22 +1035,17 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs) } else { - int naccepted = 0, entity = node; + int naccepted = 0; re_token_type_t type = dfa->nodes[node].type; - if (type == OP_CONTEXT_NODE) - { - entity = dfa->nodes[node].opr.ctx_info->entity; - type = dfa->nodes[entity].type; - } #ifdef RE_ENABLE_I18N if (ACCEPT_MB_NODE (type)) - naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx); + naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx); else #endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) { - int subexp_idx = dfa->nodes[entity].opr.idx; + int subexp_idx = dfa->nodes[node].opr.idx; naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; if (fs != NULL) { @@ -1085,18 +1065,10 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs) err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -2; - dest_node = dfa->nexts[node]; + dest_node = dfa->edests[node].elems[0]; if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node)) return dest_node; - for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i) - { - dest_node = mctx->state_log[*pidx]->nodes.elems[i]; - if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE - && (dfa->nexts[node] - == dfa->nodes[dest_node].opr.ctx_info->entity))) - return dest_node; - } } } @@ -1153,6 +1125,7 @@ pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) *pidx = fs->stack[num].idx; memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); *eps_via_nodes = fs->stack[num].eps_via_nodes; return fs->stack[num].node; } @@ -1201,12 +1174,18 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) break; if (reg_idx == nmatch) - return REG_NOERROR; + { + re_node_set_free (&eps_via_nodes); + return free_fail_stack_return (fs); + } cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); } else - return REG_NOERROR; + { + re_node_set_free (&eps_via_nodes); + return REG_NOERROR; + } } /* Proceed to next node. */ @@ -1221,10 +1200,30 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); else - return REG_NOMATCH; + { + re_node_set_free (&eps_via_nodes); + return REG_NOMATCH; + } } } re_node_set_free (&eps_via_nodes); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +free_fail_stack_return (fs) + struct re_fail_stack_t *fs; +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } return REG_NOERROR; } @@ -1314,6 +1313,7 @@ sift_states_backward (preg, mctx, sctx) { memset (sctx->sifted_states, '\0', sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); return REG_NOERROR; } re_node_set_empty (&cur_dest); @@ -1330,22 +1330,16 @@ sift_states_backward (preg, mctx, sctx) for (i = 0; i < cur_src->nelem; i++) { int prev_node = cur_src->elems[i]; - int entity = prev_node; int naccepted = 0; re_token_type_t type = dfa->nodes[prev_node].type; if (IS_EPSILON_NODE(type)) continue; - if (type == OP_CONTEXT_NODE) - { - entity = dfa->nodes[prev_node].opr.ctx_info->entity; - type = dfa->nodes[entity].type; - } #ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ if (ACCEPT_MB_NODE (type)) - naccepted = sift_states_iter_mb (preg, mctx, sctx, entity, str_idx, - sctx->last_str_idx); + naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); #endif /* RE_ENABLE_I18N */ /* We don't check backreferences here. @@ -1459,12 +1453,15 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes) /* At first, add the nodes which can epsilon transit to a node in DEST_NODE. */ - err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) - return err; + if (dest_nodes->nelem) + { + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } /* Then, check the limitations in the current sift_context. */ - if (sctx->limits.nelem) + if (dest_nodes->nelem && sctx->limits.nelem) { err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, mctx->bkref_ents, str_idx); @@ -1479,7 +1476,7 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes) /* If we are searching for the subexpression candidates. Note that we were from transit_state_bkref_loop() in this case. */ - if (sctx->check_subexp) + if (dest_nodes->nelem && sctx->check_subexp) { err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes); if (BE (err != REG_NOERROR, 0)) @@ -1538,7 +1535,7 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) int cur_node = inv_eclosure->elems[ecl_idx]; if (cur_node == node) continue; - if (dfa->edests[cur_node].nelem) + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) { int edst1 = dfa->edests[cur_node].elems[0]; int edst2 = ((dfa->edests[cur_node].nelem > 1) @@ -1580,12 +1577,10 @@ check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx) for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { - int bkref, subexp_idx/*, node_idx, cls_node*/; + int subexp_idx; struct re_backref_cache_entry *ent; ent = mctx->bkref_ents + limits->elems[lim_idx]; - bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE - ? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node); - subexp_idx = dfa->nodes[bkref].opr.idx - 1; + subexp_idx = dfa->nodes[ent->node].opr.idx - 1; dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], dfa->eclosures + dst_node, @@ -1624,13 +1619,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) { int node = eclosures->elems[node_idx]; - int entity = node; re_token_type_t type= dfa->nodes[node].type; - if (type == OP_CONTEXT_NODE) - { - entity = dfa->nodes[node].opr.ctx_info->entity; - type = dfa->nodes[entity].type; - } if (type == OP_BACK_REF) { int bi; @@ -1641,7 +1630,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, && ent->str_idx == str_idx) { int cpos, dst; - dst = dfa->nexts[node]; + dst = dfa->edests[node].elems[0]; cpos = check_dst_limits_calc_pos (dfa, mctx, limit, dfa->eclosures + dst, subexp_idx, dst, @@ -1685,17 +1674,14 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { - int bkref, subexp_idx; + int subexp_idx; struct re_backref_cache_entry *ent; ent = bkref_ents + limits->elems[lim_idx]; if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) continue; /* This is unrelated limitation. */ - bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE - ? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node); - subexp_idx = dfa->nodes[bkref].opr.idx - 1; - + subexp_idx = dfa->nodes[ent->node].opr.idx - 1; if (ent->subexp_to == str_idx) { int ops_node = -1; @@ -1790,12 +1776,8 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { re_token_type_t type; - int entity; node = dest_nodes->elems[node_idx]; type = dfa->nodes[node].type; - entity = (type != OP_CONTEXT_NODE ? node - : dfa->nodes[node].opr.ctx_info->entity); - type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type); if (type == OP_CLOSE_SUBEXP && sctx->check_subexp == dfa->nodes[node].opr.idx + 1) @@ -1933,14 +1915,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) { - int entity; int cur_bkref_idx = re_string_cur_idx (mctx->input); re_token_type_t type; node = candidates->elems[node_idx]; type = dfa->nodes[node].type; - entity = (type != OP_CONTEXT_NODE ? node - : dfa->nodes[node].opr.ctx_info->entity); - type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type); if (node == sctx->cur_bkref && str_idx == cur_bkref_idx) continue; /* Avoid infinite loop for the REs like "()\1+". */ @@ -1951,37 +1929,25 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) int enabled_idx; for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx) { - int disabled_idx, subexp_len, to_idx; + int disabled_idx, subexp_len, to_idx, dst_node; struct re_backref_cache_entry *entry; entry = mctx->bkref_ents + enabled_idx; subexp_len = entry->subexp_to - entry->subexp_from; to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); if (entry->node != node || entry->str_idx != str_idx || to_idx > sctx->last_str_idx || sctx->sifted_states[to_idx] == NULL) continue; - if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], - dfa->nexts[node])) - { - int dst_idx; - re_node_set *dsts = &sctx->sifted_states[to_idx]->nodes; - for (dst_idx = 0; dst_idx < dsts->nelem; ++dst_idx) - { - int dst_node = dsts->elems[dst_idx]; - if (dfa->nodes[dst_node].type == OP_CONTEXT_NODE - && (dfa->nodes[dst_node].opr.ctx_info->entity - == dfa->nexts[node])) - break; - } - if (dst_idx == dsts->nelem) - continue; - } + if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)) + continue; if (check_dst_limits (dfa, &sctx->limits, mctx, node, str_idx, dfa->nexts[node], to_idx)) continue; - if (sctx->check_subexp == dfa->nodes[entity].opr.idx) + if (sctx->check_subexp == dfa->nodes[node].opr.idx) { char *buf; buf = (char *) re_string_get_buffer (mctx->input); @@ -2038,9 +2004,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) return err; } local_sctx.sifted_states[str_idx] = cur_state; - re_node_set_remove_at (&local_sctx.limits, - local_sctx.limits.nelem - 1); - entry->flag = 1; + re_node_set_remove (&local_sctx.limits, enabled_idx); + /* We must not use the variable entry here, since + mctx->bkref_ents might be realloced. */ + mctx->bkref_ents[enabled_idx].flag = 1; } } for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx) @@ -2301,15 +2268,14 @@ transit_state_mb (preg, pstate, mctx) unsigned int context; re_dfastate_t *dest_state; - if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE) + if (dfa->nodes[cur_node_idx].constraint) { context = re_string_context_at (mctx->input, re_string_cur_idx (mctx->input), mctx->eflags, preg->newline_anchor); if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, - context)) + context)) continue; - cur_node_idx = dfa->nodes[cur_node_idx].opr.ctx_info->entity; } /* How many bytes the node can accepts? */ @@ -2404,17 +2370,16 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx) /* Check whether `node' is a backreference or not. */ if (node->type == OP_BACK_REF) subexp_idx = node->opr.idx; - else if (node->type == OP_CONTEXT_NODE && - dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF) + else + continue; + + if (node->constraint) { context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags, preg->newline_anchor); if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) continue; - subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx; } - else - continue; /* `node' is a backreference. Check the substring which the substring matched. */ @@ -2440,8 +2405,8 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx) if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) continue; subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; - new_dest_nodes = ((node->type == OP_CONTEXT_NODE && subexp_len == 0) - ? dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] : dfa->eclosures + dfa->nexts[node_idx]); dest_str_idx = (cur_str_idx + bkref_ent->subexp_to - bkref_ent->subexp_from); @@ -2688,16 +2653,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) /* For all the nodes belonging to `state', */ for (i = 0; i < cur_nodes->nelem; ++i) { - unsigned int constraint = 0; re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; re_token_type_t type = node->type; - - if (type == OP_CONTEXT_NODE) - { - constraint = node->constraint; - node = dfa->nodes + node->opr.ctx_info->entity; - type = node->type; - } + unsigned int constraint = node->constraint; /* Enumerate all single byte character this node can accept. */ if (type == CHARACTER) @@ -2724,10 +2682,10 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) if (constraint & NEXT_WORD_CONSTRAINT) for (j = 0; j < BITSET_UINTS; ++j) accepts[j] &= dfa->word_char[j]; - else if (constraint & NEXT_NOTWORD_CONSTRAINT) + if (constraint & NEXT_NOTWORD_CONSTRAINT) for (j = 0; j < BITSET_UINTS; ++j) accepts[j] &= ~dfa->word_char[j]; - else if (constraint & NEXT_NEWLINE_CONSTRAINT) + if (constraint & NEXT_NEWLINE_CONSTRAINT) { int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); bitset_empty (accepts); @@ -3058,10 +3016,8 @@ check_node_accept (preg, node, mctx, idx) const re_match_context_t *mctx; int idx; { - const re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - const re_token_t *cur_node; unsigned char ch; - if (node->type == OP_CONTEXT_NODE) + if (node->constraint) { /* The node has constraints. Check whether the current context satisfies the constraints. */ @@ -3070,17 +3026,13 @@ check_node_accept (preg, node, mctx, idx) preg->newline_anchor); if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) return 0; - cur_node = dfa->nodes + node->opr.ctx_info->entity; } - else - cur_node = node; - ch = re_string_byte_at (mctx->input, idx); - if (cur_node->type == CHARACTER) - return cur_node->opr.c == ch; - else if (cur_node->type == SIMPLE_BRACKET) - return bitset_contain (cur_node->opr.sbcset, ch); - else if (cur_node->type == OP_PERIOD) + if (node->type == CHARACTER) + return node->opr.c == ch; + else if (node->type == SIMPLE_BRACKET) + return bitset_contain (node->opr.sbcset, ch); + else if (node->type == OP_PERIOD) return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE)) || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL))); else @@ -3221,5 +3173,7 @@ sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx, sctx->last_node = last_node; sctx->last_str_idx = last_str_idx; sctx->check_subexp = check_subexp; + sctx->cur_bkref = -1; + sctx->cls_subexp_idx = -1; re_node_set_init_empty (&sctx->limits); } diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h index 851d91b69a..f6763b14f2 100644 --- a/sysdeps/unix/sysv/linux/i386/sysdep.h +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h @@ -290,8 +290,8 @@ asm (".L__X'%ebx = 1\n\t" #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ \ - unsigned int resultvar = INTERNAL_SYSCALL(name, nr, args); \ - if (INTERNAL_SYSCALL_ERROR_P (resultvar)) \ + unsigned int resultvar = INTERNAL_SYSCALL (name, nr, args); \ + if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (resultvar), 0)) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (resultvar)); \ resultvar = 0xffffffff; \ |