about summary refs log tree commit diff
path: root/nptl
diff options
context:
space:
mode:
Diffstat (limited to 'nptl')
-rw-r--r--nptl/ChangeLog26
-rw-r--r--nptl/cond-perf.c100
-rw-r--r--nptl/sysdeps/unix/sysv/linux/fork.c105
-rw-r--r--nptl/sysdeps/unix/sysv/linux/fork.h26
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h30
-rw-r--r--nptl/sysdeps/unix/sysv/linux/libc_pthread_init.c8
-rw-r--r--nptl/sysdeps/unix/sysv/linux/register-atfork.c176
-rw-r--r--nptl/sysdeps/unix/sysv/linux/unregister-atfork.c112
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h25
9 files changed, 414 insertions, 194 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 791d2ece5a..7bd8025850 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,29 @@
+2003-05-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/fork.c: Complete rewrite of the atfork
+	handler implementation.  It is now lockless in fork().
+	* sysdeps/unix/sysv/linux/register-atfork.c: Likewise.
+	* sysdeps/unix/sysv/linux/unregister-atfork.c: Likewise.
+	* sysdeps/unix/sysv/linux/fork.h: Don't include <link.h>.  Don't
+	declare the __fork_*_lists.
+	(struct fork_handler): Include pointers to all three functions.
+	Add next, refcntr and need_signal elements.
+	(__fork_handlers): New declaration.
+	(__register_atfork_malloc): Remove declaration.
+	(HAVE_register_atfork_malloc): Remove definition.
+	* sysdeps/unix/sysv/linux/libc_pthread_init.c: Remove
+	__pthread_child_handler variable.
+	(__libc_pthread_init): Use __register_atfork instead of explicitly
+	adding to the list.
+	* sysdeps/unix/sysv/linux/i386/lowlevellock.h: Define lll_futex_wait
+	and lll_futex_wake.
+	* sysdeps/unix/sysv/linux/x86_64/lowlevellock.h: Likewise.
+
+	* unwind.c (unwind_cleanup): Print error message and then abort.  This
+	function must never be reached.
+
+	* cond-perf.c: New file.
+
 2003-05-05  Ulrich Drepper  <drepper@redhat.com>
 
 	* sysdeps/i386/tls.h (TLS_INIT_TP): Include \n in error message.
diff --git a/nptl/cond-perf.c b/nptl/cond-perf.c
new file mode 100644
index 0000000000..c3305b391d
--- /dev/null
+++ b/nptl/cond-perf.c
@@ -0,0 +1,100 @@
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static pthread_cond_t cond1 = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t mut1 = PTHREAD_MUTEX_INITIALIZER;
+
+static pthread_cond_t cond2 = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t mut2 = PTHREAD_MUTEX_INITIALIZER;
+
+static bool last_round;
+static int ntogo;
+static bool alldone;
+
+
+static void *
+cons (void *arg)
+{
+  pthread_mutex_lock (&mut1);
+
+  do
+    {
+      if (--ntogo == 0)
+	{
+	  alldone = true;
+	  pthread_cond_signal (&cond2);
+	}
+
+      pthread_cond_wait (&cond1, &mut1);
+    }
+  while (! last_round);
+
+  pthread_mutex_unlock (&mut1);
+
+  return NULL;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int opt;
+  int err;
+  int nthreads = 10;
+  int nrounds = 100;
+  bool keeplock = false;
+
+  while ((opt = getopt (argc, argv, "n:r:k")) != -1)
+    switch (opt)
+      {
+      case 'n':
+	nthreads = atol (optarg);
+	break;
+      case 'r':
+	nrounds = atol (optarg);
+	break;
+      case 'k':
+	keeplock = true;
+	break;
+      }
+
+  ntogo = nthreads;
+
+  pthread_t th[nthreads];
+  int i;
+  for (i = 0; i < nthreads; ++i)
+    if ((err = pthread_create (&th[i], NULL, cons, (void *) (long) i)) != 0)
+      printf ("pthread_create: %s\n", strerror (err));
+
+  for (i = 0; i < nrounds; ++i)
+    {
+      pthread_mutex_lock (&mut2);
+      while (! alldone)
+	pthread_cond_wait (&cond2, &mut2);
+      pthread_mutex_unlock (&mut2);
+
+      pthread_mutex_lock (&mut1);
+      if (! keeplock)
+	pthread_mutex_unlock (&mut1);
+
+      ntogo = nthreads;
+      alldone = false;
+      if (i + 1 >= nrounds)
+	last_round = true;
+
+      pthread_cond_broadcast (&cond1);
+
+      if (keeplock)
+	pthread_mutex_unlock (&mut1);
+    }
+
+  for (i = 0; i < nthreads; ++i)
+    if ((err = pthread_join (th[i], NULL)) != 0)
+      printf ("pthread_create: %s\n", strerror (err));
+
+  return 0;
+}
diff --git a/nptl/sysdeps/unix/sysv/linux/fork.c b/nptl/sysdeps/unix/sysv/linux/fork.c
index 94d8995205..8bca6b4ab3 100644
--- a/nptl/sysdeps/unix/sysv/linux/fork.c
+++ b/nptl/sysdeps/unix/sysv/linux/fork.c
@@ -28,15 +28,15 @@
 #include <hp-timing.h>
 #include <ldsodefs.h>
 #include <bits/stdio-lock.h>
+#include <atomic.h>
 
 
 unsigned long int *__fork_generation_pointer;
 
 
-lll_lock_t __fork_lock = LLL_LOCK_INITIALIZER;
-LIST_HEAD (__fork_prepare_list);
-LIST_HEAD (__fork_parent_list);
-LIST_HEAD (__fork_child_list);
+
+/* The single linked list of all currently registered for handlers.  */
+struct fork_handler *__fork_handlers;
 
 
 static void
@@ -53,20 +53,62 @@ pid_t
 __libc_fork (void)
 {
   pid_t pid;
-  list_t *runp;
-
-  /* Get the lock so that the set of registered handlers is not
-     inconsistent or changes beneath us.  */
-  lll_lock (__fork_lock);
-
-  /* Run all the registered preparation handlers.  In reverse order.  */
-  list_for_each_prev (runp, &__fork_prepare_list)
+  struct used_handler
+  {
+    struct fork_handler *handler;
+    struct used_handler *next;
+  } *allp = NULL;
+
+  /* Run all the registered preparation handlers.  In reverse order.
+     While doing this we build up a list of all the entries.  */
+  struct fork_handler *runp;
+  while ((runp = __fork_handlers) != NULL)
     {
-      struct fork_handler *curp;
-
-      curp = list_entry (runp, struct fork_handler, list);
+      unsigned int oldval = runp->refcntr;
+
+      if (oldval == 0)
+	/* This means some other thread removed the list just after
+	   the pointer has been loaded.  Try again.  Either the list
+	   is empty or we can retry it.  */
+	continue;
+
+      /* Bump the reference counter.  */
+      if (atomic_compare_and_exchange_bool_acq (&__fork_handlers->refcntr,
+						oldval + 1, oldval))
+	/* The value changed, try again.  */
+	continue;
+
+      /* We bumped the reference counter for the first entry in the
+	 list.  That means that none of the following entries will
+	 just go away.  The unloading code works in the order of the
+	 list.
+
+         While executing the registered handlers we are building a
+         list of all the entries so that we can go backward later on.  */
+      while (1)
+	{
+	  /* Execute the handler if there is one.  */
+	  if (runp->prepare_handler != NULL)
+	    runp->prepare_handler ();
+
+	  /* Create a new element for the list.  */
+	  struct used_handler *newp
+	    = (struct used_handler *) alloca (sizeof (*newp));
+	  newp->handler = runp;
+	  newp->next = allp;
+	  allp = newp;
+
+	  /* Advance to the next handler.  */
+	  runp = runp->next;
+	  if (runp == NULL)
+	    break;
+
+	  /* Bump the reference counter for the next entry.  */
+	  atomic_increment (&runp->refcntr);
+	}
 
-      curp->handler ();
+      /* We are done.  */
+      break;
     }
 
   _IO_list_lock ();
@@ -107,13 +149,22 @@ __libc_fork (void)
       _IO_list_resetlock ();
 
       /* Run the handlers registered for the child.  */
-      list_for_each (runp, &__fork_child_list)
+      while (allp != NULL)
 	{
-	  struct fork_handler *curp;
+	  if (allp->handler->child_handler != NULL)
+	    allp->handler->child_handler ();
 
-	  curp = list_entry (runp, struct fork_handler, list);
+	  /* Note that we do not have to wake any possible waiter.
+	     This is the only thread in the new process.  */
+	  --allp->handler->refcntr;
 
-	  curp->handler ();
+	  /* XXX We could at this point look through the object pool
+	     and mark all objects not on the __fork_handlers list as
+	     unused.  This is necessary in case the fork() happened
+	     while another thread called dlclose() and that call had
+	     to create a new list.  */
+
+	  allp = allp->next;
 	}
 
       /* Initialize the fork lock.  */
@@ -127,17 +178,17 @@ __libc_fork (void)
       _IO_list_unlock ();
 
       /* Run the handlers registered for the parent.  */
-      list_for_each (runp, &__fork_parent_list)
+      while (allp != NULL)
 	{
-	  struct fork_handler *curp;
+	  if (allp->handler->parent_handler != NULL)
+	    allp->handler->parent_handler ();
 
-	  curp = list_entry (runp, struct fork_handler, list);
+	  if (atomic_decrement_and_test (&allp->handler->refcntr)
+	      && allp->handler->need_signal)
+	    lll_futex_wake (allp->handler->refcntr, 1);
 
-	  curp->handler ();
+	  allp = allp->next;
 	}
-
-      /* Release the for lock.  */
-      lll_unlock (__fork_lock);
     }
 
   return pid;
diff --git a/nptl/sysdeps/unix/sysv/linux/fork.h b/nptl/sysdeps/unix/sysv/linux/fork.h
index e59ae87964..bcdf6217c1 100644
--- a/nptl/sysdeps/unix/sysv/linux/fork.h
+++ b/nptl/sysdeps/unix/sysv/linux/fork.h
@@ -17,7 +17,6 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#include <list.h>
 #include <lowlevellock.h>
 
 /* The fork generation counter, defined in libpthread.  */
@@ -26,23 +25,24 @@ extern unsigned long int __fork_generation attribute_hidden;
 /* Pointer to the fork generation counter in the thread library.  */
 extern unsigned long int *__fork_generation_pointer attribute_hidden;
 
-/* Lock to protect handling of fork handlers.  */
+/* Lock to protect allocation and deallocation of fork handlers.  */
 extern lll_lock_t __fork_lock attribute_hidden;
 
-/* Lists of registered fork handlers.  */
-extern list_t __fork_prepare_list attribute_hidden;
-extern list_t __fork_parent_list attribute_hidden;
-extern list_t __fork_child_list attribute_hidden;
-
-
 /* Elements of the fork handler lists.  */
 struct fork_handler
 {
-  list_t list;
-  void (*handler) (void);
+  struct fork_handler *next;
+  void (*prepare_handler) (void);
+  void (*parent_handler) (void);
+  void (*child_handler) (void);
   void *dso_handle;
+  unsigned int refcntr;
+  int need_signal;
 };
 
+/* The single linked list of all currently registered for handlers.  */
+extern struct fork_handler *__fork_handlers;
+
 
 /* Function to call to unregister fork handlers.  */
 extern void __unregister_atfork (void *dso_handle) attribute_hidden;
@@ -54,9 +54,3 @@ extern int __register_atfork (void (*__prepare) (void),
 			      void (*__parent) (void),
 			      void (*__child) (void),
 			      void *dso_handle);
-
-extern void __register_atfork_malloc (void (*prepare) (void),
-				      void (*parent) (void),
-				      void (*child) (void),
-				      void *dso_handle) attribute_hidden;
-#define HAVE_register_atfork_malloc
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
index e338cfa811..2318edfcdb 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
@@ -41,6 +41,36 @@
 #define LLL_MUTEX_LOCK_INITIALIZER (0)
 
 
+#define LLL_ENTER_KERNEL "int $0x80\n\t"
+
+#define lll_futex_wait(futex, val) \
+  do {									      \
+    int __ignore;							      \
+    register __typeof (val) _val asm ("edx") = (val);			      \
+    __asm __volatile ("xchgl %2, %%ebx\n\t"				      \
+		      LLL_ENTER_KERNEL					      \
+		      "xchgl %2, %%ebx"					      \
+		      : "=a" (__ignore)					      \
+		      : "0" (SYS_futex), "D" (&futex), "S" (0),		      \
+			"c" (FUTEX_WAIT), "d" (_val),			      \
+			"i" (offsetof (tcbhead_t, sysinfo)));		      \
+  } while (0)
+
+
+#define lll_futex_wake(futex, nr) \
+  do {									      \
+    int __ignore;							      \
+    register __typeof (nr) _nr asm ("edx") = (nr);			      \
+    __asm __volatile ("xchgl %2, %%ebx\n\t"				      \
+		      LLL_ENTER_KERNEL					      \
+		      "xchgl %2, %%ebx"					      \
+		      : "=a" (__ignore)					      \
+		      : "0" (SYS_futex), "D" (&futex), "c" (FUTEX_WAKE),      \
+			"d" (_nr), "i" (0),				      \
+			"i" (offsetof (tcbhead_t, sysinfo)));		      \
+  } while (0)
+
+
 /* Does not preserve %eax and %ecx.  */
 extern int __lll_mutex_lock_wait (int val, int *__futex)
      __attribute ((regparm (2))) attribute_hidden;
diff --git a/nptl/sysdeps/unix/sysv/linux/libc_pthread_init.c b/nptl/sysdeps/unix/sysv/linux/libc_pthread_init.c
index 3891501927..3d1c021819 100644
--- a/nptl/sysdeps/unix/sysv/linux/libc_pthread_init.c
+++ b/nptl/sysdeps/unix/sysv/linux/libc_pthread_init.c
@@ -27,9 +27,6 @@
 #include <bits/libc-lock.h>
 
 
-struct fork_handler __pthread_child_handler attribute_hidden;
-
-
 #ifdef TLS_MULTIPLE_THREADS_IN_TCB
 void
 #else
@@ -46,10 +43,7 @@ __libc_pthread_init (ptr, reclaim, functions)
   __fork_generation_pointer = ptr;
 
   /* Called by a child after fork.  */
-  __pthread_child_handler.handler = reclaim;
-
-  /* The fork handler needed by libpthread.  */
-  list_add_tail (&__pthread_child_handler.list, &__fork_child_list);
+  __register_atfork (NULL, NULL, reclaim, NULL);
 
 #ifdef SHARED
   /* We copy the content of the variable pointed to by the FUNCTIONS
diff --git a/nptl/sysdeps/unix/sysv/linux/register-atfork.c b/nptl/sysdeps/unix/sysv/linux/register-atfork.c
index 6dbc163175..299ae9de29 100644
--- a/nptl/sysdeps/unix/sysv/linux/register-atfork.c
+++ b/nptl/sysdeps/unix/sysv/linux/register-atfork.c
@@ -19,114 +19,92 @@
 
 #include <errno.h>
 #include <stdlib.h>
+#include <string.h>
 #include "fork.h"
 
 
-/* Defined in libc_pthread_init.c.  */
-extern struct fork_handler __pthread_child_handler attribute_hidden;
+/* Lock to protect allocation and deallocation of fork handlers.  */
+lll_lock_t __fork_lock = LLL_LOCK_INITIALIZER;
 
 
-int
-__register_atfork (prepare, parent, child, dso_handle)
-     void (*prepare) (void);
-     void (*parent) (void);
-     void (*child) (void);
-     void *dso_handle;
+/* Number of pre-allocated handler entries.  */
+#define NHANDLER 48
+
+/* Memory pool for fork handler structures.  */
+static struct fork_handler_pool
 {
-  struct fork_handler *new_prepare = NULL;
-  struct fork_handler *new_parent = NULL;
-  struct fork_handler *new_child = NULL;
+  struct fork_handler_pool *next;
+  struct fork_handler mem[NHANDLER];
+} fork_handler_pool;
 
-  if (prepare != NULL)
-    {
-      new_prepare = (struct fork_handler *) malloc (sizeof (*new_prepare));
-      if (new_prepare == NULL)
-	goto out1;
 
-      new_prepare->handler = prepare;
-      new_prepare->dso_handle = dso_handle;
-    }
+static struct fork_handler *
+fork_handler_alloc (void)
+{
+  struct fork_handler_pool *runp = &fork_handler_pool;
+  struct fork_handler *result = NULL;
+  unsigned int i;
 
-  if (parent != NULL)
+  do
     {
-      new_parent = (struct fork_handler *) malloc (sizeof (*new_parent));
-      if (new_parent == NULL)
-	goto out2;
-
-      new_parent->handler = parent;
-      new_parent->dso_handle = dso_handle;
+      /* Search for an empty entry.  */
+      for (i = 0; i < NHANDLER; ++i)
+	if (runp->mem[i].refcntr == 0)
+	  goto found;
     }
+  while ((runp = runp->next) != NULL);
 
-  if (child != NULL)
+  /* We have to allocate a new entry.  */
+  runp = (struct fork_handler_pool *) calloc (1, sizeof (*runp));
+  if (runp != NULL)
     {
-      new_child = (struct fork_handler *) malloc (sizeof (*new_child));
-      if (new_child == NULL)
-	{
-	  free (new_parent);
-	out2:
-	  free (new_prepare);
-	out1:
-	  return errno;
-	}
-
-      new_child->handler = child;
-      new_child->dso_handle = dso_handle;
+      /* Enqueue the new memory pool into the list.  */
+      runp->next = fork_handler_pool.next;
+      fork_handler_pool.next = runp;
+
+      /* We use the last entry on the page.  This means when we start
+	 searching from the front the next time we will find the first
+	 entry unused.  */
+      i = NHANDLER - 1;
+
+    found:
+      result = &runp->mem[i];
+      result->refcntr = 1;
+      result->need_signal = 0;
     }
 
-  /* Get the lock to not conflict with running forks.  */
-  lll_lock (__fork_lock);
-
-  /* Now that we have all the handlers allocate enqueue them.  */
-  if (new_prepare != NULL)
-    list_add_tail (&new_prepare->list, &__fork_prepare_list);
-  if (new_parent != NULL)
-    list_add_tail (&new_parent->list, &__fork_parent_list);
-  if (new_child != NULL)
-    list_add_tail (&new_child->list, &__fork_child_list);
-
-  /* Release the lock.  */
-  lll_unlock (__fork_lock);
-
-  return 0;
+  return result;
 }
 
 
-/* Three static memory blocks used when registering malloc.  */
-static struct fork_handler malloc_prepare;
-static struct fork_handler malloc_parent;
-static struct fork_handler malloc_child;
-
-
-void
-attribute_hidden
-__register_atfork_malloc (prepare, parent, child, dso_handle)
+int
+__register_atfork (prepare, parent, child, dso_handle)
      void (*prepare) (void);
      void (*parent) (void);
      void (*child) (void);
      void *dso_handle;
 {
-  /* Pre-fork handler.  */
-  malloc_prepare.handler = prepare;
-  malloc_prepare.dso_handle = dso_handle;
-
-  /* Parent handler.  */
-  malloc_parent.handler = parent;
-  malloc_parent.dso_handle = dso_handle;
-
-  /* Child handler.  */
-  malloc_child.handler = child;
-  malloc_child.dso_handle = dso_handle;
-
-  /* Get the lock to not conflict with running forks.  */
+  /* Get the lock to not conflict with other allocations.  */
   lll_lock (__fork_lock);
 
-  /* Now that we have all the handlers allocate enqueue them.  */
-  list_add_tail (&malloc_prepare.list, &__fork_prepare_list);
-  list_add_tail (&malloc_parent.list, &__fork_parent_list);
-  list_add_tail (&malloc_child.list, &__fork_child_list);
+  struct fork_handler *newp = fork_handler_alloc ();
+
+  if (newp != NULL)
+    {
+      /* Initialize the new record.  */
+      newp->prepare_handler = prepare;
+      newp->parent_handler = parent;
+      newp->child_handler = child;
+      newp->dso_handle = dso_handle;
+
+      newp->next = __fork_handlers;
+      __fork_handlers = newp;
+    }
 
   /* Release the lock.  */
   lll_unlock (__fork_lock);
+
+  return newp == NULL ? ENOMEM : 0;
 }
 
 
@@ -135,36 +113,22 @@ libc_freeres_fn (free_mem)
   /* Get the lock to not conflict with running forks.  */
   lll_lock (__fork_lock);
 
-  list_t *runp;
-  list_t *prevp;
-
-  list_for_each_prev_safe (runp, prevp, &__fork_prepare_list)
-    {
-      list_del (runp);
+  /* No more fork handlers.  */
+  __fork_handlers = NULL;
 
-      struct fork_handler *p = list_entry (runp, struct fork_handler, list);
-      if (p != &malloc_prepare)
-	free (p);
-    }
+  /* Free eventually alloated memory blocks for the object pool.  */
+  struct fork_handler_pool *runp = fork_handler_pool.next;
 
-  list_for_each_prev_safe (runp, prevp, &__fork_parent_list)
-    {
-      list_del (runp);
+  memset (&fork_handler_pool, '\0', sizeof (fork_handler_pool));
 
-      struct fork_handler *p = list_entry (runp, struct fork_handler, list);
-      if (p != &malloc_parent)
-	free (p);
-    }
+  /* Release the lock.  */
+  lll_unlock (__fork_lock);
 
-  list_for_each_prev_safe (runp, prevp, &__fork_child_list)
+  /* We can free the memory after releasing the lock.  */
+  while (runp != NULL)
     {
-      list_del (runp);
-
-      struct fork_handler *p = list_entry (runp, struct fork_handler, list);
-      if (p != &__pthread_child_handler && p != &malloc_child)
-	free (p);
+      struct fork_handler_pool *oldp;
+      runp = runp->next;
+      free (oldp);
     }
-
-  /* Release the lock.  */
-  lll_unlock (__fork_lock);
 }
diff --git a/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c b/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
index e9eb1918f6..ac4b3c5d4c 100644
--- a/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
+++ b/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
@@ -20,56 +20,92 @@
 #include <errno.h>
 #include <stdlib.h>
 #include "fork.h"
-
-
-/* Defined in libc_pthread_init.c.  */
-extern struct fork_handler __pthread_child_handler attribute_hidden;
-/* Three static memory blocks used when registering malloc.  */
-static struct fork_handler malloc_prepare;
-static struct fork_handler malloc_parent;
-static struct fork_handler malloc_child;
+#include <atomic.h>
 
 
 void
 __unregister_atfork (dso_handle)
      void *dso_handle;
 {
-  /* Get the lock to not conflict with running forks.  */
-  lll_lock (__fork_lock);
-
-  list_t *runp;
-  list_t *prevp;
-
-  list_for_each_prev_safe (runp, prevp, &__fork_prepare_list)
-    if (list_entry (runp, struct fork_handler, list)->dso_handle == dso_handle)
+  /* Check whether there is any entry in the list which we have to
+     remove.  It is likely that this is not the case so don't bother
+     getting the lock.
+
+     We do not worry about other threads adding entries for this DSO
+     right this moment.  If this happens this is a race and we can do
+     whatever we please.  The program will crash anyway seen.  */
+  struct fork_handler *runp = __fork_handlers;
+  struct fork_handler *lastp = NULL;
+
+  while (runp != NULL)
+    if (runp->dso_handle == dso_handle)
+      break;
+    else
       {
-	list_del (runp);
-
-	struct fork_handler *p = list_entry (runp, struct fork_handler, list);
-	if (p != &malloc_prepare)
-	  free (p);
+	lastp = runp;
+	runp = runp->next;
       }
 
-  list_for_each_prev_safe (runp, prevp, &__fork_parent_list)
-    if (list_entry (runp, struct fork_handler, list)->dso_handle == dso_handle)
-      {
-	list_del (runp);
+  if (runp == NULL)
+    /* Nothing to do.  */
+    return;
 
-	struct fork_handler *p = list_entry (runp, struct fork_handler, list);
-	if (p != &malloc_parent)
-	  free (p);
-      }
-
-  list_for_each_prev_safe (runp, prevp, &__fork_child_list)
-    if (list_entry (runp, struct fork_handler, list)->dso_handle == dso_handle)
-      {
-	list_del (runp);
+  /* Get the lock to not conflict with additions or deletions.  Note
+     that there couldn't have been another thread deleting something.
+     The __unregister_atfork function is only called from the
+     dlclose() code which itself serializes the operations.  */
+  lll_lock (__fork_lock);
 
-	struct fork_handler *p = list_entry (runp, struct fork_handler, list);
-	if (p != &__pthread_child_handler && p != &malloc_child)
-	  free (p);
-      }
+  /* We have to create a new list with all the entries we don't remove.  */
+  struct deleted_handler
+  {
+    struct fork_handler *handler;
+    struct deleted_handler *next;
+  } *deleted = NULL;
+
+  /* Remove the entries for the DSO which is unloaded from the list.
+     It's a single linked list so readers are.  */
+  do
+    {
+      if (runp->dso_handle == dso_handle)
+	{
+	  if (lastp == NULL)
+	    __fork_handlers = runp->next;
+	  else
+	    lastp->next = runp->next;
+
+	  /* We cannot overwrite the ->next element now.  Put the deleted
+	     entries in a separate list.  */
+	  struct deleted_handler *newp = alloca (sizeof (*newp));
+	  newp->handler = runp;
+	  newp->next = deleted;
+	  deleted = newp;
+	}
+      else
+	lastp = runp;
+
+      runp = runp->next;
+    }
+  while (runp != NULL);
 
   /* Release the lock.  */
   lll_unlock (__fork_lock);
+
+  /* Walk the list of all entries which have to be deleted.  */
+  while (deleted != NULL)
+    {
+      /* We need to be informed by possible current users.  */
+      deleted->handler->need_signal = 1;
+      /* Make sure this gets written out first.  */
+      atomic_write_barrier ();
+
+      /* Decrement the reference counter.  If it does not reach zero
+	 wait for the last user.  */
+      atomic_decrement (&deleted->handler->refcntr);
+      unsigned int val;
+      while ((val = deleted->handler->refcntr) != 0)
+	lll_futex_wait (deleted->handler->refcntr, val);
+
+      deleted = deleted->next;
+    }
 }
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
index b54ebbfca4..8e1742b671 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
@@ -41,6 +41,31 @@
 #define LLL_MUTEX_LOCK_INITIALIZER (0)
 
 
+#define lll_futex_wait(futex, val) \
+  do {									      \
+    int __ignore;							      \
+    register __typeof (val) _val asm ("edx") = (val);			      \
+    __asm __volatile ("xorq %%r10, %%r10\n\t"				      \
+		      "syscall"						      \
+		      : "=a" (__ignore)					      \
+		      : "0" (SYS_futex), "D" (&futex), "S" (FUTEX_WAIT),      \
+			"d" (_val)					      \
+		      : "memory", "cc", "r10", "r11", "cx");		      \
+  } while (0)
+
+
+#define lll_futex_wake(futex, nr) \
+  do {									      \
+    int __ignore;							      \
+    register __typeof (nr) _nr asm ("edx") = (nr);			      \
+    __asm __volatile ("syscall"						      \
+		      : "=a" (__ignore)					      \
+		      : "0" (SYS_futex), "D" (&futex), "S" (FUTEX_WAKE),      \
+			"d" (_nr)					      \
+		      : "memory", "cc", "r10", "r11", "cx");		      \
+  } while (0)
+
+
 /* Does not preserve %eax and %ecx.  */
 extern int __lll_mutex_lock_wait (int *__futex, int __val) attribute_hidden;
 /* Does not preserver %eax, %ecx, and %edx.  */