about summary refs log tree commit diff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-11-24 19:30:51 +0000
committerUlrich Drepper <drepper@redhat.com>2003-11-24 19:30:51 +0000
commit65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa (patch)
tree119234eb952b9bd87c68ceb03f68826d4bbad4de /posix/regexec.c
parent951d64082330765a22da6beac6e067ec054605e7 (diff)
downloadglibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.tar.gz
glibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.tar.xz
glibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.zip
Update.
2003-11-24  Jakub Jelinek  <jakub@redhat.com>

	* posix/regex_internal.h (re_token_t): Add word_char bit.  Add
	comment.
	(re_dfa_t): Add sb_char field.
	(bitset_mask): New function.
	* posix/regcomp.c (free_dfa_content): Free sb_char.
	(init_dfa): Don't initialize word_char unnecessarily.
	Initialize sb_char.
	(duplicate_node): Don't duplicate !word_char CHARACTERs with
	NEXT_WORD_CONSTRAINT constraint or word_char CHARACTERs with
	NEXT_NOTWORD_CONSTRAINT.  Return -1 in *new_idx instead.
	(duplicate_node_closure): Handle clone_dest == -1 from
	duplicate_node.
	(peek_token): Initialize word_char bit.
	(parse_expression, parse_dup_op): Add comments.
	(parse_bracket_exp): Don't set bitmask bits for multi-byte char
	starting bytes here at the beginning.  Mask off the bits right
	before creating SIMPLE_BRACKET.
	(build_charclass_op): Likewise.
	* posix/regexec.c (group_nodes_into_DFAstates) <case OP_PERIOD>: Only
	set accept bits for single-byte characters.
	(group_nodes_into_DFAstates): Don't rely on characters 0 .. 127
	being single byte encoded and the rest multi-byte.
	* posix/bug-regex19.c (tests): Add new tests.
	(do_mb_tests): Initialize t to *test.
	(main): Fail even on do_mb_tests errors.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c37
1 files changed, 26 insertions, 11 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index 0b524856ca..58ac9c82c4 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -3341,7 +3341,12 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
 	}
       else if (type == OP_PERIOD)
 	{
-	  bitset_set_all (accepts);
+#ifdef RE_ENABLE_I18N
+	  if (dfa->mb_cur_max > 1)
+	    bitset_merge (accepts, dfa->sb_char);
+	  else
+#endif	  
+	    bitset_set_all (accepts);
 	  if (!(preg->syntax & RE_DOT_NEWLINE))
 	    bitset_clear (accepts, '\n');
 	  if (preg->syntax & RE_DOT_NOT_NULL)
@@ -3362,8 +3367,6 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
 	 match it the context.  */
       if (constraint)
 	{
-	  int word_char_max;
-
 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
 	    {
 	      int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
@@ -3379,16 +3382,28 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
 	      continue;
 	    }
 
-	  /* This assumes ASCII compatible locale.  We cannot say
-	     anything about the non-ascii chars.  */
-	  word_char_max
-	    = dfa->mb_cur_max > 1 ? BITSET_UINTS / 2 : BITSET_UINTS;
 	  if (constraint & NEXT_WORD_CONSTRAINT)
-	    for (j = 0; j < word_char_max; ++j)
-	      accepts[j] &= dfa->word_char[j];
+	    {
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_UINTS; ++j)
+		  accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]);
+	      else
+#endif
+		for (j = 0; j < BITSET_UINTS; ++j)
+		  accepts[j] &= dfa->word_char[j];
+	    }
 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
-	    for (j = 0; j < word_char_max; ++j)
-	      accepts[j] &= ~dfa->word_char[j];
+	    {
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_UINTS; ++j)
+		  accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]);
+	      else
+#endif
+		for (j = 0; j < BITSET_UINTS; ++j)
+		  accepts[j] &= ~dfa->word_char[j];
+	    }
 	}
 
       /* Then divide `accepts' into DFA states, or create a new