about summary refs log tree commit diff
path: root/misc/tsearch.c
diff options
context:
space:
mode:
Diffstat (limited to 'misc/tsearch.c')
-rw-r--r--misc/tsearch.c612
1 files changed, 510 insertions, 102 deletions
diff --git a/misc/tsearch.c b/misc/tsearch.c
index 6af6536a72..466536bf34 100644
--- a/misc/tsearch.c
+++ b/misc/tsearch.c
@@ -1,5 +1,6 @@
-/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
+   Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
@@ -16,175 +17,584 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-/* Tree search generalized from Knuth (6.2.2) Algorithm T just like
-   the AT&T man page says.
-
-   The node_t structure is for internal use only, lint doesn't grok it.
-
-   Written by reading the System V Interface Definition, not the code.
+/* Tree search for red/black trees.
+   The algorithm for adding nodes is taken from one of the many "Algorithms"
+   books by Robert Sedgewick, although the implementation differs.
+   The algorithm for deleting nodes can probably be found in a book named
+   "Introduction to Algorithms" by Cormen/Leiserson/Rivest.  At least that's
+   the book that my professor took most algorithms from during the "Data
+   Structures" course...
 
    Totally public domain.  */
-/*LINTLIBRARY*/
+
+/* Red/black trees are binary trees in which the edges are colored either red
+   or black.  They have the following properties:
+   1. The number of black edges on every path from the root to a leaf is
+      constant.
+   2. No two red edges are adjacent.
+   Therefore there is an upper bound on the length of every path, it's
+   O(log n) where n is the number of nodes in the tree.  No path can be longer
+   than 1+2*P where P is the length of the shortest path in the tree.
+   Useful for the implementation:
+   3. If one of the children of a node is NULL, then the other one is red
+      (if it exists).
+
+   In the implementation, not the edges are colored, but the nodes.  The color
+   interpreted as the color of the edge leading to this node.  The color is
+   meaningless for the root node, but we color the root node black for
+   convenience.  All added nodes are red initially.
+
+   Adding to a red/black tree is rather easy.  The right place is searched
+   with a usual binary tree search.  Additionally, whenever a node N is
+   reached that has two red successors, the successors are colored black and
+   the node itself colored red.  This moves red edges up the tree where they
+   pose less of a problem once we get to really insert the new node.  Changing
+   N's color to red may violate rule 2, however, so rotations may become
+   necessary to restore the invariants.  Adding a new red leaf may violate
+   the same rule, so afterwards an additional check is run and the tree
+   possibly rotated.
+
+   Deleting is hairy.  There are mainly two nodes involved: the node to be
+   deleted (n1), and another node that is to be unchained from the tree (n2).
+   If n1 has a successor (the node with a smallest key that is larger than
+   n1), then the successor becomes n2 and its contents are copied into n1,
+   otherwise n1 becomes n2.
+   Unchaining a node may violate rule 1: if n2 is black, one subtree is
+   missing one black edge afterwards.  The algorithm must try to move this
+   error upwards towards the root, so that the subtree that does not have
+   enough black edges becomes the whole tree.  Once that happens, the error
+   has disappeared.  It may not be necessary to go all the way up, since it
+   is possible that rotations and recoloring can fix the error before that.
+
+   Although the deletion algorithm must walk upwards through the tree, we
+   do not store parent pointers in the nodes.  Instead, delete allocates a
+   small array of parent pointers and fills it while descending the tree.
+   Since we know that the length of a path is O(log n), where n is the number
+   of nodes, this is likely to use less memory.  */
+
+/* Tree rotations look like this:
+      A                C
+     / \              / \
+    B   C            A   G
+   / \ / \  -->     / \
+   D E F G         B   F
+                  / \
+                 D   E
+
+   In this case, A has been rotated left.  This preserves the ordering of the
+   binary tree.  */
 
 #include <stdlib.h>
 #include <search.h>
 
-/* This routine is not very bad.  It makes many assumptions about
-   the compiler. It assumes that the first field in the node must be
-   the "key" field, which points to the datum. It is very tricky
-   stuff. H.J.  */
-
 typedef struct node_t
 {
+  /* Callers expect this to be the first element in the structure - do not
+     move!  */
   const void *key;
   struct node_t *left;
   struct node_t *right;
+  unsigned int red:1;
+} *node;
+
+#undef DEBUGGING
+
+#ifdef DEBUGGING
+
+/* Routines to check tree invariants.  */
+
+#include <assert.h>
+
+#define CHECK_TREE(a) check_tree(a)
+
+static void
+check_tree_recurse (node p, int d_sofar, int d_total)
+{
+  if (p == NULL)
+    {
+      assert (d_sofar == d_total);
+      return;
+    }
+
+  check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total);
+  check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total);
+  if (p->left)
+    assert (!(p->left->red && p->red));
+  if (p->right)
+    assert (!(p->right->red && p->red));
+}
+
+static void
+check_tree (node root)
+{
+  int cnt = 0;
+  node p;
+  if (root == NULL)
+    return;
+  root->red = 0;
+  for(p = root->left; p; p = p->left)
+    cnt += !p->red;
+  check_tree_recurse (root, 0, cnt);
 }
-node;
 
-/* Prototype fpr local function.  */
-static void trecurse __P ((const void *vroot, __action_fn_t action, int level));
 
+#else
 
-/* find or insert datum into search tree.
-char 	*key;		 key to be located
-node	**rootp;	 address of tree root
-int	(*compar)();	 ordering function
-*/
+#define CHECK_TREE(a)
+
+#endif
+
+/* Possibly "split" a node with two red successors, and/or fix up two red
+   edges in a row.  ROOTP is a pointer to the lowest node we visited, PARENTP
+   and GPARENTP pointers to its parent/grandparent.  P_R and GP_R contain the
+   comparison values that determined which way was taken in the tree to reach
+   ROOTP.  MODE is 1 if we need not do the split, but must check for two red
+   edges between GPARENTP and ROOTP.  */
+static void
+maybe_split_for_insert (node *rootp, node *parentp, node *gparentp,
+			int p_r, int gp_r, int mode)
+{
+  node root = *rootp;
+  node *rp, *lp;
+  rp = &(*rootp)->right;
+  lp = &(*rootp)->left;
+
+  /* See if we have to split this node (both successors red).  */
+  if (mode == 1
+      || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red))
+    {
+      /* This node becomes red, its successors black.  */
+      root->red = 1;
+      if (*rp)
+	(*rp)->red = 0;
+      if (*lp)
+	(*lp)->red = 0;
+
+      /* If the parent of this node is also red, we have to do
+	 rotations.  */
+      if (parentp != NULL && (*parentp)->red)
+	{
+	  node gp = *gparentp;
+	  node p = *parentp;
+	  /* There are two main cases:
+	     1. The edge types (left or right) of the two red edges differ.
+	     2. Both red edges are of the same type.
+	     There exist two symmetries of each case, so there is a total of
+	     4 cases.  */
+	  if ((p_r > 0) != (gp_r > 0))
+	    {
+	      /* Put the child at the top of the tree, with its parent
+		 and grandparent as successors.  */
+	      p->red = 1;
+	      gp->red = 1;
+	      root->red = 0;
+	      if (p_r < 0)
+		{
+		  /* Child is left of parent.  */
+		  p->left = *rp;
+		  *rp = p;
+		  gp->right = *lp;
+		  *lp = gp;
+		}
+	      else
+		{
+		  /* Child is right of parent.  */
+		  p->right = *lp;
+		  *lp = p;
+		  gp->left = *rp;
+		  *rp = gp;
+		}
+	      *gparentp = root;
+	    }
+	  else
+	    {
+	      *gparentp = *parentp;
+	      /* Parent becomes the top of the tree, grandparent and
+		 child are its successors.  */
+	      p->red = 0;
+	      gp->red = 1;
+	      if (p_r < 0)
+		{
+		  /* Left edges.  */
+		  gp->left = p->right;
+		  p->right = gp;
+		}
+	      else
+		{
+		  /* Right edges.  */
+		  gp->right = p->left;
+		  p->left = gp;
+		}
+	    }
+	}
+    }
+}
+
+/* Find or insert datum into search tree.
+   KEY is the key to be located, ROOTP is the address of tree root,
+   COMPAR the ordering function.  */
 void *
-__tsearch (key, vrootp, compar)
-     const void *key;
-     void **vrootp;
-     __compar_fn_t compar;
+__tsearch (const void *key, void **vrootp, __compar_fn_t compar)
 {
-  node *q;
-  node **rootp = (node **) vrootp;
+  node q;
+  node *parentp = NULL, *gparentp = NULL;
+  node *rootp = (node *) vrootp;
+  node *nextp;
+  int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler.  */
 
   if (rootp == NULL)
     return NULL;
 
-  while (*rootp != NULL)		/* Knuth's T1: */
-    {
-      int r;
+  /* This saves some additional tests below.  */
+  if (*rootp != NULL)
+    (*rootp)->red = 0;
+
+  CHECK_TREE (*rootp);
 
-      r = (*compar) (key, (*rootp)->key);
-      if (r == 0)			/* T2: */
-	return *rootp;			/* we found it! */
-      rootp = (r < 0)
-	      ? &(*rootp)->left		/* T3: follow left branch */
-	      : &(*rootp)->right;	/* T4: follow right branch */
+  nextp = rootp;
+  while (*nextp != NULL)
+    {
+      node root = *rootp;
+      r = (*compar) (key, root->key);
+      if (r == 0)
+	return root;
+
+      maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0);
+      /* If that did any rotations, parentp and gparentp are now garbage.
+	 That doesn't matter, because the values they contain are never
+	 used again in that case.  */
+
+      nextp = r < 0 ? &root->left : &root->right;
+      if (*nextp == NULL)
+	break;
+
+      gparentp = parentp;
+      parentp = rootp;
+      rootp = nextp;
+
+      gp_r = p_r;
+      p_r = r;
     }
 
-  q = (node *) malloc (sizeof (node));	/* T5: key not found */
-  if (q != NULL)			/* make new node */
+  q = (struct node_t *) malloc (sizeof (struct node_t));
+  if (q != NULL)
     {
-      *rootp = q;			/* link new node to old */
+      *nextp = q;			/* link new node to old */
       q->key = key;			/* initialize new node */
+      q->red = 1;
       q->left = q->right = NULL;
     }
+  if (nextp != rootp)
+    /* There may be two red edges in a row now, which we must avoid by
+       rotating the tree.  */
+    maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1);
 
   return q;
 }
 weak_alias (__tsearch, tsearch)
 
 
+/* Find datum in search tree.
+   KEY is the key to be located, ROOTP is the address of tree root,
+   COMPAR the ordering function.  */
 void *
 __tfind (key, vrootp, compar)
      const void *key;
      const void **vrootp;
      __compar_fn_t compar;
 {
-  node **rootp = (node **) vrootp;
+  node *rootp = (node *) vrootp;
 
   if (rootp == NULL)
     return NULL;
 
-  while (*rootp != NULL)		/* Knuth's T1: */
+  CHECK_TREE (*rootp);
+
+  while (*rootp != NULL)
     {
+      node root = *rootp;
       int r;
 
-      r = (*compar)(key, (*rootp)->key);
-      if (r == 0)			/* T2: */
-	return *rootp;			/* we found it! */
+      r = (*compar) (key, root->key);
+      if (r == 0)
+	return root;
 
-      rootp = (r < 0)
-	      ? &(*rootp)->left		/* T3: follow left branch */
-	      : &(*rootp)->right;	/* T4: follow right branch */
+      rootp = r < 0 ? &root->left : &root->right;
     }
-    return NULL;
+  return NULL;
 }
 weak_alias (__tfind, tfind)
 
 
-/* delete node with given key
-char	*key;		key to be deleted
-node	**rootp;	address of the root of tree
-int	(*compar)();	comparison function
-*/
+/* Delete node with given key.
+   KEY is the key to be deleted, ROOTP is the address of the root of tree,
+   COMPAR the comparison function.  */
 void *
-__tdelete (key, vrootp, compar)
-     const void *key;
-     void **vrootp;
-     __compar_fn_t compar;
+__tdelete (const void *key, void **vrootp, __compar_fn_t compar)
 {
-  node *p;
-  node *q;
-  node *r;
+  node p, q, r, retval;
   int cmp;
-  node **rootp = (node **) vrootp;
+  node *rootp = (node *) vrootp;
+  node root, unchained;
+  /* Stack of nodes so we remember the parents without recursion.  It's
+     _very_ unlikely that there are paths longer than 40 nodes.  The tree
+     would need to have around 250.000 nodes.  */
+  int stacksize = 40;
+  int sp = 0;
+  node **nodestack = alloca (sizeof (node *) * stacksize);
 
-  if (rootp == NULL || (p = *rootp) == NULL)
+  if (rootp == NULL)
     return NULL;
+  p = *rootp;
+  if (p == NULL)
+    return NULL;
+
+  CHECK_TREE (p);
 
   while ((cmp = (*compar) (key, (*rootp)->key)) != 0)
     {
+      if (sp == stacksize)
+	{
+	  node **newstack;
+	  stacksize += 20;
+	  newstack = alloca (sizeof (node *) * stacksize);
+	  memcpy (newstack, nodestack, sp * sizeof (node *));
+	  nodestack = newstack;
+	}
+
+      nodestack[sp++] = rootp;
       p = *rootp;
-      rootp = (cmp < 0)
-	      ? &(*rootp)->left		/* follow left branch */
-	      : &(*rootp)->right;	/* follow right branch */
+      rootp = ((cmp < 0)
+	       ? &(*rootp)->left
+	       : &(*rootp)->right);
       if (*rootp == NULL)
-	return NULL;			/* key not found */
+	return NULL;
     }
 
-  r = (*rootp)->right;			/* D1: */
-  q = (*rootp)->left;
-  if (q == NULL)			/* Left NULL? */
-    q = r;
-  else if (r != NULL)			/* Right link is NULL? */
+  /* This is bogus if the node to be deleted is the root... this routine
+     really should return an integer with 0 for success, -1 for failure
+     and errno = ESRCH or something.  */
+  retval = p;
+
+  /* We don't unchain the node we want to delete. Instead, we overwrite
+     it with its successor and unchain the successor.  If there is no
+     successor, we really unchain the node to be deleted.  */
+
+  root = *rootp;
+
+  r = root->right;
+  q = root->left;
+
+  if (q == NULL || r == NULL)
+    unchained = root;
+  else
     {
-      if (r->left == NULL)		/* D2: Find successor */
+      node *parent = rootp, *up = &root->right;
+      for (;;)
 	{
-	  r->left = q;
-	  q = r;
+	  if (sp == stacksize)
+	    {
+	      node **newstack;
+	      stacksize += 20;
+	      newstack = alloca (sizeof (node *) * stacksize);
+	      memcpy (newstack, nodestack, sp * sizeof (node *));
+	      nodestack = newstack;
+	    }
+	  nodestack[sp++] = parent;
+	  parent = up;
+	  if ((*up)->left == NULL)
+	    break;
+	  up = &(*up)->left;
 	}
+      unchained = *up;
+    }
+
+  /* We know that either the left or right successor of UNCHAINED is NULL.
+     R becomes the other one, it is chained into the parent of UNCHAINED.  */
+  r = unchained->left;
+  if (r == NULL)
+    r = unchained->right;
+  if (sp == 0)
+    *rootp = r;
+  else
+    {
+      q = *nodestack[sp-1];
+      if (unchained == q->right)
+	q->right = r;
       else
-	{				/* D3: Find (struct node_t *)0 link */
-	  for (q = r->left; q->left != NULL; q = r->left)
-	    r = q;
-	  r->left = q->right;
-	  q->left = (*rootp)->left;
-	  q->right = (*rootp)->right;
+	q->left = r;
+    }
+
+  if (unchained != root)
+    root->key = unchained->key;
+  if (!unchained->red)
+    {
+      /* Now we lost a black edge, which means that the number of black
+	 edges on every path is no longer constant.  We must balance the
+	 tree.  */
+      /* NODESTACK now contains all parents of R.  R is likely to be NULL
+	 in the first iteration.  */
+      /* NULL nodes are considered black throughout - this is necessary for
+	 correctness.  */
+      while (sp > 0 && (r == NULL || !r->red))
+	{
+	  node *pp = nodestack[sp - 1];
+	  p = *pp;
+	  /* Two symmetric cases.  */
+	  if (r == p->left)
+	    {
+	      /* Q is R's brother, P is R's parent.  The subtree with root
+		 R has one black edge less than the subtree with root Q.  */
+	      q = p->right;
+	      if (q != NULL && q->red)
+		{
+		  /* If Q is red, we know that P is black. We rotate P left
+		     so that Q becomes the top node in the tree, with P below
+		     it.  P is colored red, Q is colored black.
+		     This action does not change the black edge count for any
+		     leaf in the tree, but we will be able to recognize one
+		     of the following situations, which all require that Q
+		     is black.  */
+		  q->red = 0;
+		  p->red = 1;
+		  /* Left rotate p.  */
+		  p->right = q->left;
+		  q->left = p;
+		  *pp = q;
+		  /* Make sure pp is right if the case below tries to use
+		     it.  */
+		  nodestack[sp++] = pp = &q->left;
+		  q = p->right;
+		}
+	      /* We know that Q can't be NULL here.  We also know that Q is
+		 black.  */
+	      if ((q->left == NULL || !q->left->red)
+		  && (q->right == NULL || !q->right->red))
+		{
+		  /* Q has two black successors.  We can simply color Q red.
+		     The whole subtree with root P is now missing one black
+		     edge.  Note that this action can temporarily make the
+		     tree invalid (if P is red).  But we will exit the loop
+		     in that case and set P black, which both makes the tree
+		     valid and also makes the black edge count come out
+		     right.  If P is black, we are at least one step closer
+		     to the root and we'll try again the next iteration.  */
+		  q->red = 1;
+		  r = p;
+		}
+	      else
+		{
+		  /* Q is black, one of Q's successors is red.  We can
+		     repair the tree with one operation and will exit the
+		     loop afterwards.  */
+		  if (q->right == NULL || !q->right->red)
+		    {
+		      /* The left one is red.  We perform the same action as
+			 in maybe_split_for_insert where two red edges are
+			 adjacent but point in different directions:
+			 Q's left successor (let's call it Q2) becomes the
+			 top of the subtree we are looking at, its parent (Q)
+			 and grandparent (P) become its successors. The former
+			 successors of Q2 are placed below P and Q.
+			 P becomes black, and Q2 gets the color that P had.
+			 This changes the black edge count only for node R and
+			 its successors.  */
+		      node q2 = q->left;
+		      q2->red = p->red;
+		      p->right = q2->left;
+		      q->left = q2->right;
+		      q2->right = q;
+		      q2->left = p;
+		      *pp = q2;
+		      p->red = 0;
+		    }
+		  else
+		    {
+		      /* It's the right one.  Rotate P left. P becomes black,
+			 and Q gets the color that P had.  Q's right successor
+			 also becomes black.  This changes the black edge
+			 count only for node R and its successors.  */
+		      q->red = p->red;
+		      p->red = 0;
+
+		      q->right->red = 0;
+
+		      /* left rotate p */
+		      p->right = q->left;
+		      q->left = p;
+		      *pp = q;
+		    }
+
+		  /* We're done.  */
+		  sp = 1;
+		  r = NULL;
+		}
+	    }
+	  else
+	    {
+	      /* Comments: see above.  */
+	      q = p->left;
+	      if (q != NULL && q->red)
+		{
+		  q->red = 0;
+		  p->red = 1;
+		  p->left = q->right;
+		  q->right = p;
+		  *pp = q;
+		  nodestack[sp++] = pp = &q->right;
+		  q = p->left;
+		}
+	      if ((q->right == NULL || !q->right->red)
+		       && (q->left == NULL || !q->left->red))
+		{
+		  q->red = 1;
+		  r = p;
+		}
+	      else
+		{
+		  if (q->left == NULL || !q->left->red)
+		    {
+		      node q2 = q->right;
+		      q2->red = p->red;
+		      p->left = q2->right;
+		      q->right = q2->left;
+		      q2->left = q;
+		      q2->right = p;
+		      *pp = q2;
+		      p->red = 0;
+		    }
+		  else
+		    {
+		      q->red = p->red;
+		      p->red = 0;
+		      q->left->red = 0;
+		      p->left = q->right;
+		      q->right = p;
+		      *pp = q;
+		    }
+		  sp = 1;
+		  r = NULL;
+		}
+	    }
+	  --sp;
 	}
+      if (r != NULL)
+	r->red = 0;
     }
-  free ((struct node_t *) *rootp);	/* D4: Free node */
-  *rootp = q;				/* link parent to new node */
-  return p;
+
+  free (unchained);
+  return retval;
 }
 weak_alias (__tdelete, tdelete)
 
 
-/* Walk the nodes of a tree
-node	*root;		Root of the tree to be walked
-void	(*action)();	Function to be called at each node
-int	level;
-*/
+/* Walk the nodes of a tree.
+   ROOT is the root of the tree to be walked, ACTION the function to be
+   called at each node.  LEVEL is the level of ROOT in the whole tree.  */
 static void
-trecurse (vroot, action, level)
-     const void *vroot;
-     __action_fn_t action;
-     int level;
+trecurse (const void *vroot, __action_fn_t action, int level)
 {
-  node *root = (node *) vroot;
+  node root = (node ) vroot;
 
   if (root->left == NULL && root->right == NULL)
     (*action) (root, leaf, level);
@@ -201,17 +611,15 @@ trecurse (vroot, action, level)
 }
 
 
-/* void twalk(root, action)	Walk the nodes of a tree
-node	*root;			Root of the tree to be walked
-void	(*action)();		Function to be called at each node
-PTR
-*/
+/* Walk the nodes of a tree.
+   ROOT is the root of the tree to be walked, ACTION the function to be
+   called at each node.  */
 void
-__twalk (vroot, action)
-     const void *vroot;
-     __action_fn_t action;
+__twalk (const void *vroot, __action_fn_t action)
 {
-  const node *root = (node *) vroot;
+  const node root = (node) vroot;
+
+  CHECK_TREE (root);
 
   if (root != NULL && action != NULL)
     trecurse (root, action, 0);