about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog21
-rw-r--r--posix/regex_internal.c207
2 files changed, 151 insertions, 77 deletions
diff --git a/ChangeLog b/ChangeLog
index 655d83ce61..9fb6457d15 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2004-01-02  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regex_internal.c (re_node_set_add_intersect,
+	re_node_set_merge): Rewritten.
+	(re_node_set_insert, re_node_set_remove_at):
+	Avoid memmove, we know what direction we should copy and that we
+	are copying 32-bit words.
+	(re_node_set_compare): Iterate backwards.
+
 2003-12-30  Paul Eggert  <eggert@twinsun.com>
 
 	* time/mktime.c (verify): New macro.
@@ -8,6 +17,18 @@
 
 2004-01-02  Jakub Jelinek  <jakub@redhat.com>
 
+	* posix/regex_internal.h (re_match_context_t): Add dfa member.
+	* posix/regexec.c (match_ctx_free_subtops, search_cur_bkref_entry,
+	match_ctx_add_sublast, sift_ctx_init, acquire_init_state_context,
+	prune_impossible_nodes, check_halt_state_context, proceed_next_node,
+	sift_states_backward, update_cur_sifted_state, check_dst_limits,
+	check_dst_limits_calc_pos, sift_states_bkref, transit_state,
+	check_subexp_matching_top, transit_state_sb, transit_state_mb,
+	transit_state_bkref, get_subexp, get_subexp_sub, check_arrival,
+	check_arrival_add_next_nodes, expand_bkref_cache, check_node_accept):
+	Remove dfa parameter.  Get dfa from mctxt.  Adjust callers.
+	(re_search_internal): Initialize mctxt.dfa.
+
 	* posix/regex_internal.c (re_node_set_insert): Remove unused variables.
 
 	* posix/regex_internal.h (re_dfa_t): Add syntax field.
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 2c6c407b02..ee386709d9 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -988,45 +988,86 @@ re_node_set_add_intersect (dest, src1, src2)
      re_node_set *dest;
      const re_node_set *src1, *src2;
 {
-  int i1, i2, id;
-  if (src1->nelem > 0 && src2->nelem > 0)
+  int i1, i2, is, id, delta, sbase;
+  if (src1->nelem == 0 || src2->nelem == 0)
+    return REG_NOERROR;
+
+  /* We need dest->nelem + 2 * elems_in_intersection; this is a
+     conservative estimate.  */
+  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
     {
-      if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
-	{
-	  int new_alloc = src1->nelem + src2->nelem + dest->nelem;
-	  int *new_elems = re_realloc (dest->elems, int, new_alloc);
-	  if (BE (new_elems == NULL, 0))
-	    return REG_ESPACE;
-	  dest->elems = new_elems;
-	  dest->alloc = new_alloc;
-	}
+      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+      int *new_elems = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_elems == NULL, 0))
+        return REG_ESPACE;
+      dest->elems = new_elems;
+      dest->alloc = new_alloc;
     }
-  else
-    return REG_NOERROR;
 
-  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+  /* Find the items in the intersection of SRC1 and SRC2, and copy
+     into the top of DEST those that are not already in DEST itself.  */
+  sbase = dest->nelem + src1->nelem + src2->nelem;
+  i1 = src1->nelem - 1;
+  i2 = src2->nelem - 1;
+  id = dest->nelem - 1;
+  for (;;)
     {
-      if (src1->elems[i1] > src2->elems[i2])
+      if (src1->elems[i1] == src2->elems[i2])
 	{
-	  ++i2;
-	  continue;
+	  /* Try to find the item in DEST.  Maybe we could binary search?  */
+	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
+	    --id;
+
+          if (id < 0 || dest->elems[id] != src1->elems[i1])
+            dest->elems[--sbase] = src1->elems[i1];
+
+	  if (--i1 < 0 || --i2 < 0)
+	    break;
 	}
-      if (src1->elems[i1] == src2->elems[i2])
+
+      /* Lower the highest of the two items.  */
+      else if (src1->elems[i1] < src2->elems[i2])
 	{
-	  while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
-	    ++id;
-	  if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
-	    ++id;
-	  else
-	    {
-	      memmove (dest->elems + id + 1, dest->elems + id,
-		       sizeof (int) * (dest->nelem - id));
-	      dest->elems[id++] = src2->elems[i2++];
-	      ++dest->nelem;
-	    }
+	  if (--i2 < 0)
+	    break;
+	}
+      else
+	{
+	  if (--i1 < 0)
+	    break;
 	}
-      ++i1;
     }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + src1->nelem + src2->nelem - 1;
+  delta = is - sbase + 1;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place; this is more or
+     less the same loop that is in re_node_set_merge.  */
+  dest->nelem += delta;
+  if (delta > 0 && id >= 0)
+    for (;;)
+      {
+        if (dest->elems[is] > dest->elems[id])
+          {
+            /* Copy from the top.  */
+            dest->elems[id + delta--] = dest->elems[is--];
+            if (delta == 0)
+              break;
+          }
+        else
+          {
+            /* Slide from the bottom.  */
+            dest->elems[id + delta] = dest->elems[id];
+            if (--id < 0)
+              break;
+          }
+      }
+
+  /* Copy remaining SRC elements.  */
+  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
   return REG_NOERROR;
 }
 
@@ -1091,10 +1132,10 @@ re_node_set_merge (dest, src)
      re_node_set *dest;
      const re_node_set *src;
 {
-  int si, di;
+  int is, id, sbase, delta;
   if (src == NULL || src->nelem == 0)
     return REG_NOERROR;
-  if (dest->alloc < src->nelem + dest->nelem)
+  if (dest->alloc < 2 * src->nelem + dest->nelem)
     {
       int new_alloc = 2 * (src->nelem + dest->alloc);
       int *new_buffer = re_realloc (dest->elems, int, new_alloc);
@@ -1104,52 +1145,65 @@ re_node_set_merge (dest, src)
       dest->alloc = new_alloc;
     }
 
-  for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
+  if (BE (dest->nelem == 0, 0))
     {
-      int cp_from, ncp, mid, right, src_elem = src->elems[si];
-      /* Binary search the spot we will add the new element.  */
-      right = dest->nelem;
-      while (di < right)
-	{
-	  mid = (di + right) / 2;
-	  if (dest->elems[mid] < src_elem)
-	    di = mid + 1;
-	  else
-	    right = mid;
-	}
-      if (di >= dest->nelem)
-	break;
+      dest->nelem = src->nelem;
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      return REG_NOERROR;
+    }
 
-      if (dest->elems[di] == src_elem)
-	{
-	  /* Skip since, DEST already has the element.  */
-	  ++di;
-	  ++si;
-	  continue;
-	}
+  /* Copy into the top of DEST the items of SRC that are not
+     found in DEST.  Maybe we could binary search in DEST?  */
+  for (sbase = dest->nelem + 2 * src->nelem,
+       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+    {
+      if (dest->elems[id] == src->elems[is])
+        is--, id--;
+      else if (dest->elems[id] < src->elems[is])
+        dest->elems[--sbase] = src->elems[is--];
+      else /* if (dest->elems[id] > src->elems[is]) */
+        --id;
+    }
 
-      /* Skip the src elements which are less than dest->elems[di].  */
-      cp_from = si;
-      while (si < src->nelem && src->elems[si] < dest->elems[di])
-	++si;
-      /* Copy these src elements.  */
-      ncp = si - cp_from;
-      memmove (dest->elems + di + ncp, dest->elems + di,
-	       sizeof (int) * (dest->nelem - di));
-      memcpy (dest->elems + di, src->elems + cp_from,
-	      sizeof (int) * ncp);
-      /* Update counters.  */
-      di += ncp;
-      dest->nelem += ncp;
+  if (is >= 0)
+    {
+      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
+      sbase -= is + 1;
+      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
     }
 
-  /* Copy remaining src elements.  */
-  if (si < src->nelem)
+  id = dest->nelem - 1;
+  is = dest->nelem + 2 * src->nelem - 1;
+  delta = is - sbase + 1;
+  if (delta == 0)
+    return REG_NOERROR;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place.  */
+  dest->nelem += delta;
+  for (;;)
     {
-      memcpy (dest->elems + di, src->elems + si,
-	      sizeof (int) * (src->nelem - si));
-      dest->nelem += src->nelem - si;
+      if (dest->elems[is] > dest->elems[id])
+        {
+	  /* Copy from the top.  */
+          dest->elems[id + delta--] = dest->elems[is--];
+	  if (delta == 0)
+	    break;
+	}
+      else
+        {
+          /* Slide from the bottom.  */
+          dest->elems[id + delta] = dest->elems[id];
+	  if (--id < 0)
+	    {
+	      /* Copy remaining SRC elements.  */
+	      memcpy (dest->elems, dest->elems + sbase,
+	              delta * sizeof (int));
+	      break;
+	    }
+	}
     }
+
   return REG_NOERROR;
 }
 
@@ -1196,8 +1250,8 @@ re_node_set_insert (set, elem)
   if (elem < set->elems[0])
     {
       idx = 0;
-      memmove (set->elems + 1, set->elems,
-	       sizeof (int) * set->nelem);
+      for (idx = set->nelem; idx > 0; idx--)
+        set->elems[idx] = set->elems[idx - 1];
     }
   else
     {
@@ -1221,7 +1275,7 @@ re_node_set_compare (set1, set2)
   int i;
   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
     return 0;
-  for (i = 0 ; i < set1->nelem ; i++)
+  for (i = set1->nelem ; --i >= 0 ; )
     if (set1->elems[i] != set2->elems[i])
       return 0;
   return 1;
@@ -1259,10 +1313,9 @@ re_node_set_remove_at (set, idx)
 {
   if (idx < 0 || idx >= set->nelem)
     return;
-  if (idx < set->nelem - 1)
-    memmove (set->elems + idx, set->elems + idx + 1,
-	     sizeof (int) * (set->nelem - idx - 1));
   --set->nelem;
+  for (; idx < set->nelem; idx++)
+    set->elems[idx] = set->elems[idx + 1];
 }