summary refs log tree commit diff
path: root/posix/regex_internal.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-11-18 23:40:59 +0000
committerUlrich Drepper <drepper@redhat.com>2003-11-18 23:40:59 +0000
commitad7f28c29d06ddb4506d0d75e089732740b5bd2b (patch)
treede9ee40c2d213a4113e3da2b8cedc3d505386bc1 /posix/regex_internal.h
parent5146ec9a2c092cb74b5cd0eb8b5e938b46f1631b (diff)
downloadglibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.tar.gz
glibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.tar.xz
glibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.zip
Update.
	* posix/regex_internal.h (re_token_type_t): Remove unused ALT,
	END_OF_RE_TOKEN_T and SUBEXP.  Reorder values.  Add OP_UTF8_PERIOD
	and EPSILON_BIT.
	(IS_EPSILON_NODE): Just test if EPSILON_BIT is set.
	(ACCEPT_MB_NODE): Return 1 for OP_UTF8_PERIOD as well.
	* posix/regex_internal.c (create_ci_newstate, create_cd_newstate):
	Handle OP_UTF8_PERIOD.
	(re_string_reconstruct): Set valid_len for single byte char searching
	with no translation and case sensitivity.
	* posix/regcomp.c (re_compile_fastmap_iter, calc_first): Handle
	OP_UTF8_PERIOD.
	(re_compile_internal): Don't call optimize_utf8 if preg->translate
	!= NULL.
	(optimize_utf8): Remove BACK_SLASH case.
	Transform OP_PERIOD into OP_UTF8_PERIOD if the searching can be
	optimized.
	(parse_bracket_exp): Don't create SIMPLE_BRACKET if it doesn't have
	any bits set and COMPLEX_BRACKET is used.
	* posix/regexec.c (transit_state_mb): Fix comment typo.
	(group_nodes_into_DFAstates, check_node_accept): Handle
	OP_UTF8_PERIOD.
	(check_node_accept_bytes): Likewise.  Reorder slightly so that
	re_string_char_size_at and re_string_elem_size_at are called
	only when needed.
	* posix/bug-regex20.c (BRE, ERE): Define.
	(tests): Use them to make lines shorter.  Expect . to be
	optimized.  Add lots of new tests.
	(main): Run (ATM just case sensitive) test with backwards searching
	as well.

2003-11-18  Jakub Jelinek  <jakub@redhat.com>
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r--posix/regex_internal.h59
1 files changed, 27 insertions, 32 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 9fcf865f65..f905d2b510 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -167,8 +167,31 @@ typedef enum
 {
   NON_TYPE = 0,
 
+  /* Node type, These are used by token, node, tree.  */
+  CHARACTER = 1,
+  END_OF_RE = 2,
+  SIMPLE_BRACKET = 3,
+  OP_BACK_REF = 4,
+  OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+  COMPLEX_BRACKET = 6,
+  OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+  EPSILON_BIT = 8,
+  OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+  OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+  OP_ALT = EPSILON_BIT | 2,
+  OP_DUP_ASTERISK = EPSILON_BIT | 3,
+  OP_DUP_PLUS = EPSILON_BIT | 4,
+  OP_DUP_QUESTION = EPSILON_BIT | 5,
+  ANCHOR = EPSILON_BIT | 6,
+
+  /* Tree type, these are used only by tree. */
+  CONCAT = 16,
+
   /* Token type, these are used only by token.  */
-  OP_OPEN_BRACKET,
+  OP_OPEN_BRACKET = 17,
   OP_CLOSE_BRACKET,
   OP_CHARSET_RANGE,
   OP_OPEN_DUP_NUM,
@@ -184,32 +207,8 @@ typedef enum
   OP_NOTWORD,
   OP_SPACE,
   OP_NOTSPACE,
-  BACK_SLASH,
+  BACK_SLASH
 
-  /* Tree type, these are used only by tree. */
-  CONCAT,
-  ALT,
-  SUBEXP,
-  SIMPLE_BRACKET,
-#ifdef RE_ENABLE_I18N
-  COMPLEX_BRACKET,
-#endif /* RE_ENABLE_I18N */
-
-  /* Node type, These are used by token, node, tree.  */
-  OP_OPEN_SUBEXP,
-  OP_CLOSE_SUBEXP,
-  OP_PERIOD,
-  CHARACTER,
-  END_OF_RE,
-  OP_ALT,
-  OP_DUP_ASTERISK,
-  OP_DUP_PLUS,
-  OP_DUP_QUESTION,
-  OP_BACK_REF,
-  ANCHOR,
-
-  /* Dummy marker.  */
-  END_OF_RE_TOKEN_T
 } re_token_type_t;
 
 #ifdef RE_ENABLE_I18N
@@ -284,13 +283,9 @@ typedef struct
 #endif
 } re_token_t;
 
-#define IS_EPSILON_NODE(type) \
-  ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
-   || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
-   || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
-
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
 #define ACCEPT_MB_NODE(type) \
-  ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
+  ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
 
 struct re_string_t
 {