summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--posix/bug-regex13.c28
-rw-r--r--posix/bug-regex19.c3
-rw-r--r--posix/regexec.c44
4 files changed, 72 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 631375ba82..ddfe6defeb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2003-12-21  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/regcomp.c (duplicate_node, duplicate_node_closure): Revert
+	2003-11-24 changes.
+	* posix/regexec.c (group_nodes_into_DFAstates): For CHARACTER with
+	NEXT_{,NOT}WORD_CONSTRAINT check word_char bit.
+	* posix/bug-regex19.c (tests): Add new tests.
+
+	* posix/regexec.c (check_dst_limits_calc_pos): Fix formatting.
+
+	* posix/regcomp.c (parse_dup_op): Return NULL if dup_elem is NULL,
+	after checking syntax.  Optimize.
+	(calc_first): Fix comment.
+	* posix/bug-regex13.c (tests): Add new tests.
+
 2003-12-21  Roland McGrath  <roland@redhat.com>
 
 	* manual/arith.texi (Parsing of Integers): Typo fix.
diff --git a/posix/bug-regex13.c b/posix/bug-regex13.c
index e6b0ca1ecf..df1c95d64c 100644
--- a/posix/bug-regex13.c
+++ b/posix/bug-regex13.c
@@ -34,7 +34,33 @@ static struct
 } tests[] = {
   {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "1", -1}, /* It should not match.  */
   {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "-", 0}, /* It should match.  */
-  {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0}
+  {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abbc", -1},
+  /* Nested duplication.  */
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "ac", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abc", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "ac", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbc", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abbc", -1},
 };
 
 int
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
index f24e0aa730..4000b19b4d 100644
--- a/posix/bug-regex19.c
+++ b/posix/bug-regex19.c
@@ -246,6 +246,9 @@ static struct test_s
   {ERE, "(\\<|[A].)[A~C]", "DACC", 0, 1},
   {ERE, "(\\<|[A].)[A~C]", "B!A=", 0, 2},
   {ERE, "(\\<|[A].)[A~C]", "B~C", 0, 2},
+  {ERE, "^[^A]*\\bB", "==B", 0, 0},
+  {ERE, "^[^A]*\\bB", "CBD!=B", 0, 0},
+  {ERE, "[^A]*\\bB", "==B", 2, 2}
 };
 
 int
diff --git a/posix/regexec.c b/posix/regexec.c
index 72b26f112b..7f8fac8961 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -1746,7 +1746,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
      int limit, subexp_idx, from_node, str_idx;
 {
   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
-      int node_idx;
+  int node_idx;
 
   /* If we are outside the range of the subexpression, return -1 or 1.  */
   if (str_idx < lim->subexp_from)
@@ -1761,23 +1761,23 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
 
   /* Else, we are on the boundary: examine the nodes on the epsilon
      closure.  */
-      for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
-	{
-	  int node = eclosures->elems[node_idx];
+  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+    {
+      int node = eclosures->elems[node_idx];
       switch (dfa->nodes[node].type)
 	{
 	case OP_BACK_REF:
-	    {
-	      int bi = search_cur_bkref_entry (mctx, str_idx);
-	      for (; bi < mctx->nbkref_ents; ++bi)
-		{
-		  struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
+	  {
+	    int bi = search_cur_bkref_entry (mctx, str_idx);
+	    for (; bi < mctx->nbkref_ents; ++bi)
+	      {
+		struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
 		int dst, cpos;
 
 		/* If this backreference goes beyond the point we're
 		   examining, don't go any further.  */
-		  if (ent->str_idx > str_idx)
-		    break;
+		if (ent->str_idx > str_idx)
+		  break;
 
 		if (ent->node != node || ent->subexp_from != ent->subexp_to)
 		  continue;
@@ -1788,7 +1788,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
 		   node, don't recurse because it would cause an
 		   infinite loop: a regex that exhibits this behavior
 		   is ()\1*\1*  */
-		      dst = dfa->edests[node].elems[0];
+		dst = dfa->edests[node].elems[0];
 		if (dst == from_node)
 		  {
 		    if (str_idx == lim->subexp_from)
@@ -1797,17 +1797,17 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
 		      return 0;
 		  }
 
-		      cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
-							dfa->eclosures + dst,
-							subexp_idx, dst,
-							str_idx);
+		cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
+						  dfa->eclosures + dst,
+						  subexp_idx, dst,
+						  str_idx);
 
 		if (cpos == -1 && str_idx == lim->subexp_from)
 		  return -1;
 
 		if (cpos == 0 /* && str_idx == lim->lim->subexp_to */)
 		  return 0;
-	    }
+	      }
 	      break;
 	    }
 
@@ -3416,6 +3416,11 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
 	  if (constraint & NEXT_WORD_CONSTRAINT)
 	    {
 	      unsigned int any_set = 0;
+	      if (type == CHARACTER && !node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
 #ifdef RE_ENABLE_I18N
 	      if (dfa->mb_cur_max > 1)
 		for (j = 0; j < BITSET_UINTS; ++j)
@@ -3430,6 +3435,11 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
 	    {
 	      unsigned int any_set = 0;
+	      if (type == CHARACTER && node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
 #ifdef RE_ENABLE_I18N
 	      if (dfa->mb_cur_max > 1)
 		for (j = 0; j < BITSET_UINTS; ++j)