summary refs log tree commit diff
path: root/posix/regex_internal.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2005-01-26 22:42:49 +0000
committerUlrich Drepper <drepper@redhat.com>2005-01-26 22:42:49 +0000
commit02f3550c8bf47ecff6b548bc8ba3219d234a41a3 (patch)
tree668b767c8ad6842abd668203e35858a13225f3c6 /posix/regex_internal.h
parent629311b74a9f4f2c9a6d91ff50f76d0ee8fa21c0 (diff)
downloadglibc-02f3550c8bf47ecff6b548bc8ba3219d234a41a3.tar.gz
glibc-02f3550c8bf47ecff6b548bc8ba3219d234a41a3.tar.xz
glibc-02f3550c8bf47ecff6b548bc8ba3219d234a41a3.zip
[BZ #605, BZ #611]
Update.
2004-12-13  Paolo Bonzini  <bonzini@gnu.org>

	Separate parsing and creation of the NFA.  Avoided recursion on
	the (very unbalanced) parse tree.
	[BZ #611]
	* posix/regcomp.c (struct subexp_optimize, analyze_tree, calc_epsdest,
	re_dfa_add_tree_node, mark_opt_subexp_iter): Removed.
	(optimize_subexps, duplicate_tree, calc_first, calc_next,
	mark_opt_subexp): Rewritten.
	(preorder, postorder, lower_subexps, lower_subexp, link_nfa_nodes,
	create_token_tree, free_tree, free_token): New.
	(analyze): Accept a regex_t *.  Invoke the passes via the preorder and
	postorder generic visitors.  Do not initialize the fields in the
	re_dfa_t that represent the transitions.
	(free_dfa_content): Use free_token.
	(re_compile_internal): Analyze before UTF-8 optimizations.  Do not
	include optimization of subexpressions.
	(create_initial_state): Fetch the DFA node index from the first node's
	bin_tree_t *.
	(optimize_utf8): Abort on unexpected nodes, including OP_DUP_QUESTION.
	Return on COMPLEX_BRACKET.
	(duplicate_node_closure): Fix comment.
	(duplicate_node): Do not initialize the fields in the
	re_dfa_t that represent the transitions.
	(calc_eclosure, calc_inveclosure): Do not handle OP_DELETED_SUBEXP.
	(create_tree): Remove final argument.  All callers adjusted.  Rewritten
	to use create_token_tree.
	(parse_reg_exp, parse_branch, parse_expression, parse_bracket_exp,
	build_charclass_op): Use create_tree or create_token_tree instead
	of re_dfa_add_tree_node.
	(parse_dup_op): Likewise.  Also free the tree using free_tree for
	"<re>{0}", and lower OP_DUP_QUESTION to OP_ALT: "a?" is equivalent
	to "a|".  Adjust invocation of mark_opt_subexp.
	(parse_sub_exp): Create a single SUBEXP node.
	* posix/regex_internal.c (re_dfa_add_node): Remove last parameter,
	always perform as if it was 1.  Do not initialize OPT_SUBEXP and
	DUPLICATED, and initialize the DFA fields representing the transitions.
	* posix/regex_internal.h (re_dfa_add_node): Adjust prototype.
	(re_token_type_t): Move OP_DUP_PLUS and OP_DUP_QUESTION to the tokens
	section.  Add a tree-only code SUBEXP.  Remove OP_DELETED_SUBEXP.
	(bin_tree_t): Include a full re_token_t for TOKEN.  Turn FIRST and
	NEXT into pointers to trees.  Remove ECLOSURE.

2004-12-28  Paolo Bonzini  <bonzini@gnu.org >

	[BZ #605]
	* posix/regcomp.c (parse_bracket_exp): Do not modify DFA nodes
	that were already created.
	* posix/regex_internal.c (re_dfa_add_node): Set accept_mb field
	in the token if needed.
	(create_ci_newstate, create_cd_newstate): Set accept_mb field
	from the tokens' field.
	* posix/regex_internal.h (re_token_t): Add accept_mb field.
	(ACCEPT_MB_NODE): Removed.
	* posix/regexec.c (proceed_next_node, transit_states_mb,
	build_sifted_states, check_arrival_add_next_nodes): Use
	accept_mb instead of ACCEPT_MB_NODE.
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r--posix/regex_internal.h24
1 files changed, 11 insertions, 13 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 694a69965a..f065cf449d 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -189,16 +189,16 @@ typedef enum
   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
   OP_ALT = EPSILON_BIT | 2,
   OP_DUP_ASTERISK = EPSILON_BIT | 3,
-  OP_DUP_PLUS = EPSILON_BIT | 4,
-  OP_DUP_QUESTION = EPSILON_BIT | 5,
-  ANCHOR = EPSILON_BIT | 6,
-  OP_DELETED_SUBEXP = EPSILON_BIT | 7,
+  ANCHOR = EPSILON_BIT | 4,
 
   /* Tree type, these are used only by tree. */
   CONCAT = 16,
+  SUBEXP = 17,
 
   /* Token type, these are used only by token.  */
-  OP_OPEN_BRACKET = 17,
+  OP_DUP_PLUS = 18,
+  OP_DUP_QUESTION,
+  OP_OPEN_BRACKET,
   OP_CLOSE_BRACKET,
   OP_CHARSET_RANGE,
   OP_OPEN_DUP_NUM,
@@ -287,6 +287,7 @@ typedef struct
   unsigned int duplicated : 1;
   unsigned int opt_subexp : 1;
 #ifdef RE_ENABLE_I18N
+  unsigned int accept_mb : 1;
   /* These 2 bits can be moved into the union if needed (e.g. if running out
      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
   unsigned int mb_partial : 1;
@@ -295,8 +296,6 @@ typedef struct
 } re_token_t;
 
 #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
-#define ACCEPT_MB_NODE(type) \
-  ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
 
 struct re_string_t
 {
@@ -432,15 +431,14 @@ struct bin_tree_t
   struct bin_tree_t *parent;
   struct bin_tree_t *left;
   struct bin_tree_t *right;
+  struct bin_tree_t *first;
+  struct bin_tree_t *next;
+
+  re_token_t token;
 
   /* `node_idx' is the index in dfa->nodes, if `type' == 0.
      Otherwise `type' indicate the type of this node.  */
-  re_token_type_t type;
   int node_idx;
-
-  int first;
-  int next;
-  re_node_set eclosure;
 };
 typedef struct bin_tree_t bin_tree_t;
 
@@ -680,7 +678,7 @@ static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
 #define re_node_set_empty(p) ((p)->nelem = 0)
 #define re_node_set_free(set) re_free ((set)->elems)
-static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode) internal_function;
+static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
 static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
 					const re_node_set *nodes) internal_function;
 static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,