about summary refs log tree commit diff
path: root/posix/regex_internal.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2005-01-26 22:42:49 +0000
committerUlrich Drepper <drepper@redhat.com>2005-01-26 22:42:49 +0000
commit02f3550c8bf47ecff6b548bc8ba3219d234a41a3 (patch)
tree668b767c8ad6842abd668203e35858a13225f3c6 /posix/regex_internal.c
parent629311b74a9f4f2c9a6d91ff50f76d0ee8fa21c0 (diff)
downloadglibc-02f3550c8bf47ecff6b548bc8ba3219d234a41a3.tar.gz
glibc-02f3550c8bf47ecff6b548bc8ba3219d234a41a3.tar.xz
glibc-02f3550c8bf47ecff6b548bc8ba3219d234a41a3.zip
[BZ #605, BZ #611]
Update.
2004-12-13  Paolo Bonzini  <bonzini@gnu.org>

	Separate parsing and creation of the NFA.  Avoided recursion on
	the (very unbalanced) parse tree.
	[BZ #611]
	* posix/regcomp.c (struct subexp_optimize, analyze_tree, calc_epsdest,
	re_dfa_add_tree_node, mark_opt_subexp_iter): Removed.
	(optimize_subexps, duplicate_tree, calc_first, calc_next,
	mark_opt_subexp): Rewritten.
	(preorder, postorder, lower_subexps, lower_subexp, link_nfa_nodes,
	create_token_tree, free_tree, free_token): New.
	(analyze): Accept a regex_t *.  Invoke the passes via the preorder and
	postorder generic visitors.  Do not initialize the fields in the
	re_dfa_t that represent the transitions.
	(free_dfa_content): Use free_token.
	(re_compile_internal): Analyze before UTF-8 optimizations.  Do not
	include optimization of subexpressions.
	(create_initial_state): Fetch the DFA node index from the first node's
	bin_tree_t *.
	(optimize_utf8): Abort on unexpected nodes, including OP_DUP_QUESTION.
	Return on COMPLEX_BRACKET.
	(duplicate_node_closure): Fix comment.
	(duplicate_node): Do not initialize the fields in the
	re_dfa_t that represent the transitions.
	(calc_eclosure, calc_inveclosure): Do not handle OP_DELETED_SUBEXP.
	(create_tree): Remove final argument.  All callers adjusted.  Rewritten
	to use create_token_tree.
	(parse_reg_exp, parse_branch, parse_expression, parse_bracket_exp,
	build_charclass_op): Use create_tree or create_token_tree instead
	of re_dfa_add_tree_node.
	(parse_dup_op): Likewise.  Also free the tree using free_tree for
	"<re>{0}", and lower OP_DUP_QUESTION to OP_ALT: "a?" is equivalent
	to "a|".  Adjust invocation of mark_opt_subexp.
	(parse_sub_exp): Create a single SUBEXP node.
	* posix/regex_internal.c (re_dfa_add_node): Remove last parameter,
	always perform as if it was 1.  Do not initialize OPT_SUBEXP and
	DUPLICATED, and initialize the DFA fields representing the transitions.
	* posix/regex_internal.h (re_dfa_add_node): Adjust prototype.
	(re_token_type_t): Move OP_DUP_PLUS and OP_DUP_QUESTION to the tokens
	section.  Add a tree-only code SUBEXP.  Remove OP_DELETED_SUBEXP.
	(bin_tree_t): Include a full re_token_t for TOKEN.  Turn FIRST and
	NEXT into pointers to trees.  Remove ECLOSURE.

2004-12-28  Paolo Bonzini  <bonzini@gnu.org >

	[BZ #605]
	* posix/regcomp.c (parse_bracket_exp): Do not modify DFA nodes
	that were already created.
	* posix/regex_internal.c (re_dfa_add_node): Set accept_mb field
	in the token if needed.
	(create_ci_newstate, create_cd_newstate): Set accept_mb field
	from the tokens' field.
	* posix/regex_internal.h (re_token_t): Add accept_mb field.
	(ACCEPT_MB_NODE): Removed.
	* posix/regexec.c (proceed_next_node, transit_states_mb,
	build_sifted_states, check_arrival_add_next_nodes): Use
	accept_mb instead of ACCEPT_MB_NODE.
Diffstat (limited to 'posix/regex_internal.c')
-rw-r--r--posix/regex_internal.c77
1 files changed, 37 insertions, 40 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index f15cb575eb..dbd3f24ca3 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -1330,47 +1330,49 @@ re_node_set_remove_at (set, idx)
    Or return -1, if an error will be occured.  */
 
 static int
-re_dfa_add_node (dfa, token, mode)
+re_dfa_add_node (dfa, token)
      re_dfa_t *dfa;
      re_token_t token;
-     int mode;
 {
+  int type = token.type;
   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
     {
       int new_nodes_alloc = dfa->nodes_alloc * 2;
+      int *new_nexts, *new_indices;
+      re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+
       re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
 					  new_nodes_alloc);
       if (BE (new_array == NULL, 0))
 	return -1;
       dfa->nodes = new_array;
-      if (mode)
-	{
-	  int *new_nexts, *new_indices;
-	  re_node_set *new_edests, *new_eclosures, *new_inveclosures;
-
-	  new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
-	  new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
-	  new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
-	  new_eclosures = re_realloc (dfa->eclosures, re_node_set,
-				      new_nodes_alloc);
-	  new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
-					 new_nodes_alloc);
-	  if (BE (new_nexts == NULL || new_indices == NULL
-		  || new_edests == NULL || new_eclosures == NULL
-		  || new_inveclosures == NULL, 0))
-	    return -1;
-	  dfa->nexts = new_nexts;
-	  dfa->org_indices = new_indices;
-	  dfa->edests = new_edests;
-	  dfa->eclosures = new_eclosures;
-	  dfa->inveclosures = new_inveclosures;
-	}
+      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+      new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
+				     new_nodes_alloc);
+      if (BE (new_nexts == NULL || new_indices == NULL
+	      || new_edests == NULL || new_eclosures == NULL
+	      || new_inveclosures == NULL, 0))
+	return -1;
+      dfa->nexts = new_nexts;
+      dfa->org_indices = new_indices;
+      dfa->edests = new_edests;
+      dfa->eclosures = new_eclosures;
+      dfa->inveclosures = new_inveclosures;
       dfa->nodes_alloc = new_nodes_alloc;
     }
   dfa->nodes[dfa->nodes_len] = token;
-  dfa->nodes[dfa->nodes_len].opt_subexp = 0;
-  dfa->nodes[dfa->nodes_len].duplicated = 0;
   dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+  dfa->nodes[dfa->nodes_len].accept_mb =
+    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
+  dfa->nexts[dfa->nodes_len] = -1;
+  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+  re_node_set_init_empty (dfa->inveclosures + dfa->nodes_len);
   return dfa->nodes_len++;
 }
 
@@ -1551,16 +1553,13 @@ create_ci_newstate (dfa, nodes, hash)
       re_token_type_t type = node->type;
       if (type == CHARACTER && !node->constraint)
 	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
 
       /* If the state has the halt node, the state is a halt state.  */
-      else if (type == END_OF_RE)
+      if (type == END_OF_RE)
 	newstate->halt = 1;
-#ifdef RE_ENABLE_I18N
-      else if (type == COMPLEX_BRACKET
-	       || type == OP_UTF8_PERIOD
-	       || (type == OP_PERIOD && dfa->mb_cur_max > 1))
-	newstate->accept_mb = 1;
-#endif /* RE_ENABLE_I18N */
       else if (type == OP_BACK_REF)
 	newstate->has_backref = 1;
       else if (type == ANCHOR || node->constraint)
@@ -1611,15 +1610,13 @@ create_cd_newstate (dfa, nodes, context, hash)
 
       if (type == CHARACTER && !constraint)
 	continue;
-      /* If the state has the halt node, the state is a halt state.  */
-      else if (type == END_OF_RE)
-	newstate->halt = 1;
 #ifdef RE_ENABLE_I18N
-      else if (type == COMPLEX_BRACKET
-	       || type == OP_UTF8_PERIOD
-	       || (type == OP_PERIOD && dfa->mb_cur_max > 1))
-	newstate->accept_mb = 1;
+      newstate->accept_mb |= node->accept_mb;
 #endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
       else if (type == OP_BACK_REF)
 	newstate->has_backref = 1;
       else if (type == ANCHOR)