about summary refs log tree commit diff
path: root/posix/regex_internal.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-11-24 19:30:51 +0000
committerUlrich Drepper <drepper@redhat.com>2003-11-24 19:30:51 +0000
commit65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa (patch)
tree119234eb952b9bd87c68ceb03f68826d4bbad4de /posix/regex_internal.h
parent951d64082330765a22da6beac6e067ec054605e7 (diff)
downloadglibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.tar.gz
glibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.tar.xz
glibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.zip
Update.
2003-11-24  Jakub Jelinek  <jakub@redhat.com>

	* posix/regex_internal.h (re_token_t): Add word_char bit.  Add
	comment.
	(re_dfa_t): Add sb_char field.
	(bitset_mask): New function.
	* posix/regcomp.c (free_dfa_content): Free sb_char.
	(init_dfa): Don't initialize word_char unnecessarily.
	Initialize sb_char.
	(duplicate_node): Don't duplicate !word_char CHARACTERs with
	NEXT_WORD_CONSTRAINT constraint or word_char CHARACTERs with
	NEXT_NOTWORD_CONSTRAINT.  Return -1 in *new_idx instead.
	(duplicate_node_closure): Handle clone_dest == -1 from
	duplicate_node.
	(peek_token): Initialize word_char bit.
	(parse_expression, parse_dup_op): Add comments.
	(parse_bracket_exp): Don't set bitmask bits for multi-byte char
	starting bytes here at the beginning.  Mask off the bits right
	before creating SIMPLE_BRACKET.
	(build_charclass_op): Likewise.
	* posix/regexec.c (group_nodes_into_DFAstates) <case OP_PERIOD>: Only
	set accept bits for single-byte characters.
	(group_nodes_into_DFAstates): Don't rely on characters 0 .. 127
	being single byte encoded and the rest multi-byte.
	* posix/bug-regex19.c (tests): Add new tests.
	(do_mb_tests): Initialize t to *test.
	(main): Fail even on do_mb_tests errors.
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r--posix/regex_internal.h15
1 files changed, 15 insertions, 0 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 5111f6d793..f8e99ee06a 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -133,6 +133,7 @@ typedef unsigned int *re_bitset_ptr_t;
 static inline void bitset_not (bitset set);
 static inline void bitset_merge (bitset dest, const bitset src);
 static inline void bitset_not_merge (bitset dest, const bitset src);
+static inline void bitset_mask (bitset dest, const bitset src);
 
 #define PREV_WORD_CONSTRAINT 0x0001
 #define PREV_NOTWORD_CONSTRAINT 0x0002
@@ -281,8 +282,11 @@ typedef struct
   unsigned int constraint : 10;	/* context constraint */
   unsigned int duplicated : 1;
 #ifdef RE_ENABLE_I18N
+  /* These 2 bits can be moved into the union if needed (e.g. if running out
+     of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
   unsigned int mb_partial : 1;
 #endif
+  unsigned int word_char : 1;
 } re_token_t;
 
 #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
@@ -601,6 +605,7 @@ struct re_dfa_t
   re_dfastate_t *init_state_begbuf;
   bin_tree_t *str_tree;
   bin_tree_storage_t *str_tree_storage;
+  re_bitset_ptr_t sb_char;
   int str_tree_storage_idx;
 
   /* number of subexpressions `re_nsub' is in regex_t.  */
@@ -711,6 +716,16 @@ bitset_not_merge (dest, src)
     dest[i] |= ~src[i];
 }
 
+static inline void
+bitset_mask (dest, src)
+     bitset dest;
+     const bitset src;
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+    dest[bitset_i] &= src[bitset_i];
+}
+
 #if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
 /* Inline functions for re_string.  */
 static inline int