about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--posix/PCRE.tests19
-rw-r--r--posix/regcomp.c79
-rw-r--r--posix/regex_internal.c25
-rw-r--r--posix/regex_internal.h2
5 files changed, 91 insertions, 44 deletions
diff --git a/ChangeLog b/ChangeLog
index 8b1a34542b..0267ce381b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2004-11-23  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regcomp.c (analyze_tree): Always call calc_epsdest.
+	(calc_inveclosure): Use re_node_set_insert_last.
+	(parse_dup_op): Lower X{1,5} to (X(X(X(XX?)?)?)?)?
+	rather than X?X?X?X?X?.
+	* posix/regex_internal.h (re_node_set_insert_last): New declaration.
+	* posix/regex_internal.c (re_node_set_insert_last): New function.
+	* posix/PCRE.tests: Add testcases.
+
 2004-11-25  Ulrich Drepper  <drepper@redhat.com>
 
 	* dlfcn/dlfcn.h: Remove nonnull attribute from dlopen.
diff --git a/posix/PCRE.tests b/posix/PCRE.tests
index 7ea5b9e70c..0fb9cadafc 100644
--- a/posix/PCRE.tests
+++ b/posix/PCRE.tests
@@ -2365,3 +2365,22 @@ No match
  0: bc123bc
  1: bc
  2: bc
+
+/^a{2,5}$/
+    aa
+ 0: aa
+    aaa
+ 0: aaa
+    aaaa
+ 0: aaaa
+    aaaaa
+ 0: aaaaa
+    *** Failers
+No match
+    a
+No match
+    b
+No match
+    aaaaab
+No match
+    aaaaaa
diff --git a/posix/regcomp.c b/posix/regcomp.c
index dafad9bd0c..675f816f60 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1269,8 +1269,8 @@ analyze_tree (dfa, node)
     calc_first (dfa, node);
   if (node->next == -1)
     calc_next (dfa, node);
-  if (node->eclosure.nelem == 0)
-    calc_epsdest (dfa, node);
+  calc_epsdest (dfa, node);
+
   /* Calculate "first" etc. for the left child.  */
   if (node->left != NULL)
     {
@@ -1626,7 +1626,7 @@ calc_inveclosure (dfa)
       for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
 	{
 	  dest = dfa->eclosures[src].elems[idx];
-	  re_node_set_insert (dfa->inveclosures + dest, src);
+	  re_node_set_insert_last (dfa->inveclosures + dest, src);
 	}
     }
 }
@@ -2538,7 +2538,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
      reg_errcode_t *err;
 {
   re_token_t dup_token;
-  bin_tree_t *tree = NULL;
+  bin_tree_t *tree = NULL, *old_tree = NULL;
   int i, start, end, start_idx = re_string_cur_idx (regexp);
   re_token_t start_token = *token;
 
@@ -2598,12 +2598,14 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
       end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
     }
 
+  fetch_token (token, regexp, syntax);
+
   /* Treat "<re>{0}*" etc. as "<re>{0}".  */
-  if (BE (elem == NULL, 0))
-    start = end = 0;
+  if (BE (elem == NULL || (start == 0 && end == 0), 0))
+    return NULL;
 
   /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
-  else if (BE (start > 0, 0))
+  if (BE (start > 0, 0))
     {
       tree = elem;
       for (i = 2; i <= start; ++i)
@@ -2613,52 +2615,41 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
 	  if (BE (elem == NULL || tree == NULL, 0))
 	    goto parse_dup_op_espace;
 	}
-    }
 
-  if (BE (end != start, 1))
-    {
-      dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
-      if (BE (start > 0, 0))
-	{
-          elem = duplicate_tree (elem, dfa);
-          if (BE (elem == NULL, 0))
-	    goto parse_dup_op_espace;
+      if (start == end)
+	return tree;
 
-          /* This subexpression will be marked as optional, so that
-             empty matches do not touch the registers.  */
-          mark_opt_subexp (elem, dfa);
+      /* Duplicate ELEM before it is marked optional.  */
+      elem = duplicate_tree (elem, dfa);
+      old_tree = tree;
+    }
+  else
+    old_tree = NULL;
 
-          /* Prepare the tree with the modifier.  */
-          elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
-          tree = create_tree (dfa, tree, elem, CONCAT, 0);
-	}
-      else
-	{
-	  /* We do not need to duplicate the tree because we have not
-	     created it yet.  */
-          mark_opt_subexp (elem, dfa);
-          tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
-	}
+  mark_opt_subexp (elem, dfa);
+  dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
+  tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
+  if (BE (tree == NULL, 0))
+    goto parse_dup_op_espace;
 
+  /* This loop is actually executed only when end != -1,
+     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
+     already created the start+1-th copy.  */
+  for (i = start + 2; i <= end; ++i)
+    {
+      elem = duplicate_tree (elem, dfa);
+      tree = create_tree (dfa, tree, elem, CONCAT, 0);
       if (BE (elem == NULL || tree == NULL, 0))
         goto parse_dup_op_espace;
 
-      /* This loop is actually executed only when end != -1,
-         to rewrite <re>{0,n} as <re>?<re>?<re>?...  We have
-         already created the start+1-th copy.  */
-      for (i = start + 2; i <= end; ++i)
-        {
-          elem = duplicate_tree (elem, dfa);
-          tree = create_tree (dfa, tree, elem, CONCAT, 0);
-          if (BE (elem == NULL || tree == NULL, 0))
-	    {
-	      *err = REG_ESPACE;
-	      return NULL;
-	    }
-        }
+      tree = re_dfa_add_tree_node (dfa, tree, NULL, &dup_token);
+      if (BE (tree == NULL, 0))
+        goto parse_dup_op_espace;
     }
 
-  fetch_token (token, regexp, syntax);
+  if (old_tree)
+    tree = create_tree (dfa, old_tree, tree, CONCAT, 0);
+
   return tree;
 
  parse_dup_op_espace:
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index bb1d73d9a0..cb439e5d7c 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -1250,6 +1250,31 @@ re_node_set_insert (set, elem)
   return 1;
 }
 
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have any element greater than or equal to ELEM.
+   Return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+re_node_set_insert_last (set, elem)
+     re_node_set *set;
+     int elem;
+{
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_array;
+      set->alloc = (set->alloc + 1) * 2;
+      new_array = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_array == NULL, 0))
+	return -1;
+      set->elems = new_array;
+    }
+
+  /* Insert the new element.  */
+  set->elems[set->nelem++] = elem;
+  return 1;
+}
+
 /* Compare two node sets SET1 and SET2.
    return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
 
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index a778032d77..703d409eb8 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -668,6 +668,8 @@ static reg_errcode_t re_node_set_init_union (re_node_set *dest,
 static reg_errcode_t re_node_set_merge (re_node_set *dest,
 					const re_node_set *src) internal_function;
 static int re_node_set_insert (re_node_set *set, int elem) internal_function;
+static int re_node_set_insert_last (re_node_set *set,
+				    int elem) internal_function;
 static int re_node_set_compare (const re_node_set *set1,
 				const re_node_set *set2) internal_function;
 static int re_node_set_contains (const re_node_set *set, int elem) internal_function;