about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--posix/Makefile3
-rw-r--r--posix/bug-regex24.c59
-rw-r--r--posix/regex_internal.h6
-rw-r--r--posix/regexec.c25
5 files changed, 99 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index b02b433c88..d883905126 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2004-11-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/Makefile (tests): Add bug-regex24.
+	* posix/bug-regex24.c: New file.
+
+2004-11-12  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regexec.c (check_dst_limits_calc_pos_1): Use the map to
+	cut recursive paths.  Make exit condition more precise.
+	(match_ctx_add_entry): Initialize the map.
+	* posix/regex_internal.h (struct re_backref_cache_entry): Add a map of
+	reachable subexpression nodes from each backreference cache entry.
+
 2004-11-10  Jakub Jelinek  <jakub@redhat.com>
 
 	* sysdeps/unix/sysv/linux/setreuid.c: Remove sys/syscall.h,
diff --git a/posix/Makefile b/posix/Makefile
index cd6a52c098..8bc15ad215 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -79,7 +79,8 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
 		   bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
 		   bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
-		   bug-regex21 bug-regex22 bug-regex23 tst-nice tst-nanosleep \
+		   bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
+		   tst-nice tst-nanosleep \
 		   transbug tst-rxspencer tst-pcre tst-boost \
 		   bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \
 		   tst-getaddrinfo2 bug-glob1 bug-glob2
diff --git a/posix/bug-regex24.c b/posix/bug-regex24.c
new file mode 100644
index 0000000000..83ea10bb62
--- /dev/null
+++ b/posix/bug-regex24.c
@@ -0,0 +1,59 @@
+#include <regex.h>
+#include <stdio.h>
+
+#define str "civic"
+
+#define N 10
+static const char *expected[N] =
+  {
+    str, "c", "i", "", "", "", "", "", "", ""
+  };
+
+static int
+do_test (void)
+{
+  regex_t rbuf;
+  static const char pat[] = "\
+^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$";
+
+  int err = regcomp (&rbuf, pat, REG_EXTENDED);
+  if (err != 0)
+    {
+      char errstr[300];
+      regerror (err, &rbuf, errstr, sizeof (errstr));
+      puts (errstr);
+      return err;
+    }
+
+  regmatch_t m[N];
+  err = regexec (&rbuf, str, N, m, 0);
+  if (err != 0)
+    {
+      puts ("regexec failed");
+      return 1;
+    }
+
+  int result = 0;
+  for (int i = 0; i < N; ++i)
+    if (m[i].rm_so == -1)
+      {
+	printf ("m[%d] unused\n", i);
+	result = 1;
+      }
+    else
+      {
+	int len = m[i].rm_eo - m[i].rm_so;
+
+	printf ("m[%d] = \"%.*s\"\n", i, len, str + m[i].rm_so);
+
+	if (strlen (expected[i]) != len
+	    || memcmp (expected[i], str + m[i].rm_so, len) != 0)
+	  result = 1;
+      }
+
+  return result;
+}
+
+#define TIMEOUT 30
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 023056c028..14d95a5b84 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -548,9 +548,9 @@ struct re_backref_cache_entry
   int str_idx;
   int subexp_from;
   int subexp_to;
-  /* We need only one byte from the following field.  If other small
-     fields are added the type could be changed to 'char'.  */
-  int more;
+  char more;
+  char unused;
+  unsigned short int eps_reachable_subexps_map;
 };
 
 typedef struct
diff --git a/posix/regexec.c b/posix/regexec.c
index 79104119be..a03df2636a 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -1885,7 +1885,12 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
 	      {
 		int dst, cpos;
 
-		if (ent->node != node || ent->subexp_from != ent->subexp_to)
+		if (ent->node != node)
+		  continue;
+
+		if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
+		    && (ent->eps_reachable_subexps_map
+			& (1 << (subexp_idx - 1))) == 0)
 		  continue;
 
 		/* Recurse trying to reach the OP_OPEN_SUBEXP and
@@ -1906,11 +1911,13 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
 		cpos = check_dst_limits_calc_pos_1 (mctx, boundaries,
 						    subexp_idx, dst, bkref_idx);
 
-		if (cpos == -1 && (boundaries & 1))
+		if (cpos == -1 /* && (boundaries & 1) */)
 		  return -1;
 
-		if (cpos == 0 /* && (boundaries & 2) */)
+		if (cpos == 0 && (boundaries & 2))
 		  return 0;
+
+		ent->eps_reachable_subexps_map &= ~(1 << (subexp_idx - 1));
 	      }
 	    while (ent++->more);
 	    break;
@@ -4169,6 +4176,18 @@ match_ctx_add_entry (mctx, node, str_idx, from, to)
   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+     If bit N is clear, means that this entry won't epsilon-transition to
+     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
+     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+     such node.
+
+     A backreference does not epsilon-transition unless it is empty, so set
+     to all zeros if FROM != TO.  */
+  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+    = (from == to ? ~0 : 0);
+
   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
   if (mctx->max_mb_elem_len < to - from)
     mctx->max_mb_elem_len = to - from;