about summary refs log tree commit diff
path: root/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/Makefile5
-rw-r--r--stdlib/cxa_thread_atexit_impl.c90
-rw-r--r--stdlib/tst-tls-atexit-nodelete.c24
-rw-r--r--stdlib/tst-tls-atexit.c60
4 files changed, 146 insertions, 33 deletions
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 7fc5a807fa..402466a623 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -74,7 +74,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext3 bug-getcontext bug-fmtmsg1		    \
 		   tst-secure-getenv tst-strtod-overflow tst-strtod-round   \
 		   tst-tininess tst-strtod-underflow tst-tls-atexit	    \
-		   tst-setcontext3
+		   tst-setcontext3 tst-tls-atexit-nodelete
 tests-static	:= tst-secure-getenv
 
 modules-names	= tst-tls-atexit-lib
@@ -159,6 +159,9 @@ tst-tls-atexit-lib.so-no-z-defs = yes
 $(objpfx)tst-tls-atexit: $(shared-thread-library) $(libdl)
 $(objpfx)tst-tls-atexit.out: $(objpfx)tst-tls-atexit-lib.so
 
+$(objpfx)tst-tls-atexit-nodelete: $(shared-thread-library) $(libdl)
+$(objpfx)tst-tls-atexit-nodelete.out: $(objpfx)tst-tls-atexit-lib.so
+
 $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
 	$(SHELL) $< $(common-objpfx) '$(test-program-prefix-before-env)' \
 		 '$(run-program-env)' '$(test-program-prefix-after-env)' \
diff --git a/stdlib/cxa_thread_atexit_impl.c b/stdlib/cxa_thread_atexit_impl.c
index 70c97dabc2..8e26380799 100644
--- a/stdlib/cxa_thread_atexit_impl.c
+++ b/stdlib/cxa_thread_atexit_impl.c
@@ -16,6 +16,61 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+/* CONCURRENCY NOTES:
+
+   This documents concurrency for the non-POD TLS destructor registration,
+   calling and destruction.  The functions __cxa_thread_atexit_impl,
+   _dl_close_worker and __call_tls_dtors are the three main routines that may
+   run concurrently and access shared data.  The shared data in all possible
+   combinations of all three functions are the link map list, a link map for a
+   DSO and the link map member l_tls_dtor_count.
+
+   __cxa_thread_atexit_impl acquires the load_lock before accessing any shared
+   state and hence multiple of its instances can safely execute concurrently.
+
+   _dl_close_worker acquires the load_lock before accessing any shared state as
+   well and hence can concurrently execute multiple of its own instances as
+   well as those of __cxa_thread_atexit_impl safely.  Not all accesses to
+   l_tls_dtor_count are protected by the load lock, so we need to synchronize
+   using atomics.
+
+   __call_tls_dtors accesses the l_tls_dtor_count without taking the lock; it
+   decrements the value by one.  It does not need the big lock because it does
+   not access any other shared state except for the current DSO link map and
+   its member l_tls_dtor_count.
+
+   Correspondingly, _dl_close_worker loads l_tls_dtor_count and if it is zero,
+   unloads the DSO, thus deallocating the current link map.  This is the goal
+   of maintaining l_tls_dtor_count - to unload the DSO and free resources if
+   there are no pending destructors to be called.
+
+   We want to eliminate the inconsistent state where the DSO is unloaded in
+   _dl_close_worker before it is used in __call_tls_dtors.  This could happen
+   if __call_tls_dtors uses the link map after it sets l_tls_dtor_count to 0,
+   since _dl_close_worker will conclude from the 0 l_tls_dtor_count value that
+   it is safe to unload the DSO.  Hence, to ensure that this does not happen,
+   the following conditions must be met:
+
+   1. In _dl_close_worker, the l_tls_dtor_count load happens before the DSO
+      is unload and its link map is freed
+   2. The link map dereference in __call_tls_dtors happens before the
+      l_tls_dtor_count dereference.
+
+   To ensure this, the l_tls_dtor_count decrement in __call_tls_dtors should
+   have release semantics and the load in _dl_close_worker should have acquire
+   semantics.
+
+   Concurrent executions of __call_tls_dtors should only ensure that the value
+   is accessed atomically; no reordering constraints need to be considered.
+   Likewise for the increment of l_tls_dtor_count in __cxa_thread_atexit_impl.
+
+   There is still a possibility on concurrent execution of _dl_close_worker and
+   __call_tls_dtors where _dl_close_worker reads the value of l_tls_dtor_count
+   as 1, __call_tls_dtors decrements the value of l_tls_dtor_count but
+   _dl_close_worker does not unload the DSO, having read the old value.  This
+   is not very different from a case where __call_tls_dtors is called after
+   _dl_close_worker on the DSO and hence is an accepted execution.  */
+
 #include <stdlib.h>
 #include <ldsodefs.h>
 
@@ -49,9 +104,11 @@ __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol)
   new->next = tls_dtor_list;
   tls_dtor_list = new;
 
-  /* See if we already encountered the DSO.  */
+  /* We have to acquire the big lock to prevent a racing dlclose from pulling
+     our DSO from underneath us while we're setting up our destructor.  */
   __rtld_lock_lock_recursive (GL(dl_load_lock));
 
+  /* See if we already encountered the DSO.  */
   if (__glibc_unlikely (dso_symbol_cache != dso_symbol))
     {
       ElfW(Addr) caller = (ElfW(Addr)) dso_symbol;
@@ -62,16 +119,17 @@ __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol)
 	 program (we hope).  */
       lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded;
     }
-  /* A destructor could result in a thread_local construction and the former
-     could have cleared the flag.  */
-  if (lm_cache->l_type == lt_loaded && lm_cache->l_tls_dtor_count == 0)
-    lm_cache->l_flags_1 |= DF_1_NODELETE;
-
-  new->map = lm_cache;
-  new->map->l_tls_dtor_count++;
 
+  /* This increment may only be concurrently observed either by the decrement
+     in __call_tls_dtors since the other l_tls_dtor_count access in
+     _dl_close_worker is protected by the load lock.  The execution in
+     __call_tls_dtors does not really depend on this value beyond the fact that
+     it should be atomic, so Relaxed MO should be sufficient.  */
+  atomic_fetch_add_relaxed (&lm_cache->l_tls_dtor_count, 1);
   __rtld_lock_unlock_recursive (GL(dl_load_lock));
 
+  new->map = lm_cache;
+
   return 0;
 }
 
@@ -83,19 +141,15 @@ __call_tls_dtors (void)
   while (tls_dtor_list)
     {
       struct dtor_list *cur = tls_dtor_list;
-      tls_dtor_list = tls_dtor_list->next;
 
+      tls_dtor_list = tls_dtor_list->next;
       cur->func (cur->obj);
 
-      __rtld_lock_lock_recursive (GL(dl_load_lock));
-
-      /* Allow DSO unload if count drops to zero.  */
-      cur->map->l_tls_dtor_count--;
-      if (cur->map->l_tls_dtor_count == 0 && cur->map->l_type == lt_loaded)
-        cur->map->l_flags_1 &= ~DF_1_NODELETE;
-
-      __rtld_lock_unlock_recursive (GL(dl_load_lock));
-
+      /* Ensure that the MAP dereference happens before
+	 l_tls_dtor_count decrement.  That way, we protect this access from a
+	 potential DSO unload in _dl_close_worker, which happens when
+	 l_tls_dtor_count is 0.  See CONCURRENCY NOTES for more detail.  */
+      atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1);
       free (cur);
     }
 }
diff --git a/stdlib/tst-tls-atexit-nodelete.c b/stdlib/tst-tls-atexit-nodelete.c
new file mode 100644
index 0000000000..ff290fa987
--- /dev/null
+++ b/stdlib/tst-tls-atexit-nodelete.c
@@ -0,0 +1,24 @@
+/* Verify that a RTLD_NODELETE DSO is not unloaded even if its TLS objects are
+   destroyed.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define NO_DELETE 1
+#define H2_RTLD_FLAGS (RTLD_LAZY | RTLD_NODELETE)
+#define LOADED_IS_GOOD true
+#include "tst-tls-atexit.c"
diff --git a/stdlib/tst-tls-atexit.c b/stdlib/tst-tls-atexit.c
index cea655decc..e9839d8b15 100644
--- a/stdlib/tst-tls-atexit.c
+++ b/stdlib/tst-tls-atexit.c
@@ -16,12 +16,20 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* This test dynamically loads a DSO and spawns a thread that subsequently
-   calls into the DSO to register a destructor for an object in the DSO and
-   then calls dlclose on the handle for the DSO.  When the thread exits, the
-   DSO should not be unloaded or else the destructor called during thread exit
-   will crash.  Further in the main thread, the DSO is opened and closed again,
-   at which point the DSO should be unloaded.  */
+/* For the default case, i.e. NO_DELETE not defined, the test dynamically loads
+   a DSO and spawns a thread that subsequently calls into the DSO to register a
+   destructor for an object in the DSO and then calls dlclose on the handle for
+   the DSO.  When the thread exits, the DSO should not be unloaded or else the
+   destructor called during thread exit will crash.  Further in the main
+   thread, the DSO is opened and closed again, at which point the DSO should be
+   unloaded.
+
+   When NO_DELETE is defined, the DSO is loaded twice, once with just RTLD_LAZY
+   flag and the second time with the RTLD_NODELETE flag set.  The thread is
+   spawned, destructor registered and then thread exits without closing the
+   DSO.  In the main thread, the first handle is then closed, followed by the
+   second handle.  In the end, the DSO should remain loaded due to the
+   RTLD_NODELETE flag being set in the second dlopen call.  */
 
 #include <dlfcn.h>
 #include <pthread.h>
@@ -31,6 +39,14 @@
 #include <errno.h>
 #include <link.h>
 
+#ifndef NO_DELETE
+# define LOADED_IS_GOOD false
+#endif
+
+#ifndef H2_RTLD_FLAGS
+# define H2_RTLD_FLAGS (RTLD_LAZY)
+#endif
+
 #define DSO_NAME "$ORIGIN/tst-tls-atexit-lib.so"
 
 /* Walk through the map in the _r_debug structure to see if our lib is still
@@ -43,7 +59,10 @@ is_loaded (void)
   for (; lm; lm = lm->l_next)
     if (lm->l_type == lt_loaded && lm->l_name
 	&& strcmp (basename (DSO_NAME), basename (lm->l_name)) == 0)
-	  return true;
+      {
+	printf ("%s is still loaded\n", lm->l_name);
+	return true;
+      }
   return false;
 }
 
@@ -63,7 +82,9 @@ reg_dtor_and_close (void *h)
 
   reg_dtor ();
 
+#ifndef NO_DELETE
   dlclose (h);
+#endif
 
   return NULL;
 }
@@ -104,19 +125,30 @@ do_test (void)
       return 1;
     }
 
+#ifndef NO_DELETE
   if (spawn_thread (h1) != 0)
     return 1;
+#endif
 
-  /* Now this should unload the DSO.  FIXME: This is a bug, calling dlclose
-     like this is actually wrong, but it works because cxa_thread_atexit_impl
-     has a bug which results in dlclose allowing this to work.  */
-  dlclose (h1);
+  void *h2 = dlopen (DSO_NAME, H2_RTLD_FLAGS);
+  if (h2 == NULL)
+    {
+      printf ("h2: Unable to load DSO: %s\n", dlerror ());
+      return 1;
+    }
 
-  /* Check link maps to ensure that the DSO has unloaded.  */
-  if (is_loaded ())
+#ifdef NO_DELETE
+  if (spawn_thread (h1) != 0)
     return 1;
 
-  return 0;
+  dlclose (h1);
+#endif
+  dlclose (h2);
+
+  /* Check link maps to ensure that the DSO has unloaded.  In the normal case,
+     the DSO should be unloaded if there are no uses.  However, if one of the
+     dlopen calls were with RTLD_NODELETE, the DSO should remain loaded.  */
+  return is_loaded () == LOADED_IS_GOOD ? 0 : 1;
 }
 
 #define TEST_FUNCTION do_test ()