about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--elf/dl-misc.c18
-rw-r--r--elf/dl-reloc.c26
-rw-r--r--elf/dl-support.c3
-rw-r--r--elf/dl-tls.c139
-rw-r--r--elf/rtld.c24
-rw-r--r--nptl/Makefile14
-rw-r--r--nptl/allocatestack.c5
-rw-r--r--nptl/tst-tls7a.c (renamed from nptl/tst-tls7.c)11
-rw-r--r--nptl/tst-tls7amod.c (renamed from nptl/tst-tls7mod.c)0
-rw-r--r--sysdeps/generic/ldsodefs.h3
10 files changed, 196 insertions, 47 deletions
diff --git a/elf/dl-misc.c b/elf/dl-misc.c
index b3c56ab6b0..ec70298d42 100644
--- a/elf/dl-misc.c
+++ b/elf/dl-misc.c
@@ -530,12 +530,21 @@ __signal_safe_memalign (size_t boundary, size_t size)
 void * weak_function
 __signal_safe_malloc (size_t size)
 {
+  if (!GLRO(dl_async_signal_safe))
+    return malloc (size);
+
   return __signal_safe_memalign (1, size);
 }
 
 void weak_function
 __signal_safe_free (void *ptr)
 {
+  if (!GLRO(dl_async_signal_safe))
+    {
+      free (ptr);
+      return;
+    }
+
   if (ptr == NULL)
     return;
 
@@ -549,6 +558,9 @@ __signal_safe_free (void *ptr)
 void * weak_function
 __signal_safe_realloc (void *ptr, size_t size)
 {
+  if (!GLRO(dl_async_signal_safe))
+    return realloc (ptr, size);
+
   if (size == 0)
     {
       __signal_safe_free (ptr);
@@ -567,7 +579,8 @@ __signal_safe_realloc (void *ptr, size_t size)
   if (new_ptr == NULL)
     return NULL;
 
-  memcpy (new_ptr, ptr, old_size);
+  /* Copy over the old block (but not its header).  */
+  memcpy (new_ptr, ptr, old_size - sizeof (*header));
   __signal_safe_free (ptr);
 
   return new_ptr;
@@ -576,6 +589,9 @@ __signal_safe_realloc (void *ptr, size_t size)
 void * weak_function
 __signal_safe_calloc (size_t nmemb, size_t size)
 {
+  if (!GLRO(dl_async_signal_safe))
+    return calloc (nmemb, size);
+
   void *ptr = __signal_safe_malloc (nmemb * size);
   if (ptr == NULL)
     return NULL;
diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c
index 8696453566..1625f35d1b 100644
--- a/elf/dl-reloc.c
+++ b/elf/dl-reloc.c
@@ -72,6 +72,9 @@ _dl_try_allocate_static_tls (struct link_map *map)
 
   size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
 					    - map->l_tls_firstbyte_offset);
+
+  if (!GLRO(dl_async_signal_safe))
+    map->l_tls_offset = GL(dl_tls_static_used) = offset;
 #elif TLS_DTV_AT_TP
   /* dl_tls_static_used includes the TCB at the beginning.  */
   size_t offset = (ALIGN_UP(GL(dl_tls_static_used)
@@ -83,9 +86,15 @@ _dl_try_allocate_static_tls (struct link_map *map)
   if (used > GL(dl_tls_static_size))
     goto fail;
 
+  if (!GLRO(dl_async_signal_safe)) {
+    map->l_tls_offset = offset;
+    map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
+    GL(dl_tls_static_used) = used;
+  }
 #else
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 #endif
+  if (GLRO(dl_async_signal_safe)) {
   /* We've computed the new value we want, now try to install it.  */
   ptrdiff_t val;
   if ((val = map->l_tls_offset) == NO_TLS_OFFSET)
@@ -110,14 +119,15 @@ _dl_try_allocate_static_tls (struct link_map *map)
       goto fail;
     }
   /* We installed the value; now update the globals.  */
-#if TLS_TCB_AT_TP
+#if TLS_TCB_AT_TP // second
   GL(dl_tls_static_used) = offset;
-#elif TLS_DTV_AT_TP
+#elif TLS_DTV_AT_TP // second
   map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
   GL(dl_tls_static_used) = used;
-#else
-# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
-#endif
+#else // second
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"  // second
+#endif // second
+  }
 
   /* If the object is not yet relocated we cannot initialize the
      static TLS region.  Delay it.  */
@@ -147,12 +157,14 @@ _dl_allocate_static_tls (struct link_map *map)
      (including this one) and update this map's TLS entry. A signal handler
      accessing TLS would try to do the same update and break.  */
   sigset_t old;
-  _dl_mask_all_signals (&old);
+  if (GLRO(dl_async_signal_safe))
+    _dl_mask_all_signals (&old);
   int err = -1;
   if (map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)
     err = _dl_try_allocate_static_tls (map);
 
-  _dl_unmask_signals (&old);
+  if (GLRO(dl_async_signal_safe))
+    _dl_unmask_signals (&old);
   if (err != 0)
     {
       _dl_signal_error (0, map->l_name, NULL, N_("\
diff --git a/elf/dl-support.c b/elf/dl-support.c
index febc5e2f70..9349134fa0 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -73,6 +73,9 @@ const char *_google_exec_origin_dir;
 /* Nonzero if runtime lookup should not update the .got/.plt.  */
 int _dl_bind_not;
 
+/* Nonzero if TLS handling should be async-signal-safe.  */
+int _dl_async_signal_safe;
+
 /* A dummy link map for the executable, used by dlopen to access the global
    scope.  We don't export any symbols ourselves, so this can be minimal.  */
 static struct link_map _dl_main_map =
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 5659adbba5..47242097fc 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -409,14 +409,14 @@ _dl_resize_dtv (dtv_t *dtv)
 	 dl-minimal.c malloc instead of the real malloc.  We can't free
 	 it, we have to abandon the old storage.  */
 
-      newp = malloc ((2 + newsize) * sizeof (dtv_t));
+      newp = __signal_safe_malloc ((2 + newsize) * sizeof (dtv_t));
       if (newp == NULL)
 	oom ();
       memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
     }
   else
     {
-      newp = realloc (&dtv[-1],
+      newp = __signal_safe_realloc (&dtv[-1],
 		      (2 + newsize) * sizeof (dtv_t));
       if (newp == NULL)
 	oom ();
@@ -534,13 +534,15 @@ _dl_allocate_tls (void *mem)
 }
 rtld_hidden_def (_dl_allocate_tls)
 
+/* Clear the given dtv.  (We have this here because __signal_safe_free is
+   not visible to nptl/allocatestack.c.)  */
+
 void
 _dl_clear_dtv (dtv_t *dtv)
 {
   for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
-    if (/*! dtv[1 + cnt].pointer.is_static */ 1
-	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
-      __signal_safe_free (dtv[1 + cnt].pointer.val);
+    __signal_safe_free (dtv[1 + cnt].pointer.to_free);
+
   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 }
 
@@ -584,8 +586,53 @@ rtld_hidden_def (_dl_deallocate_tls)
 #  define GET_ADDR_OFFSET ti->ti_offset
 # endif
 
+/* Allocate one DTV entry.  */
+static struct dtv_pointer
+allocate_dtv_entry (size_t alignment, size_t size)
+{
+  if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
+    {
+      /* The alignment is supported by malloc.  */
+      void *ptr = malloc (size);
+      return (struct dtv_pointer) { ptr, ptr };
+    }
+
+  /* Emulate memalign to by manually aligning a pointer returned by
+     malloc.  First compute the size with an overflow check.  */
+  size_t alloc_size = size + alignment;
+  if (alloc_size < size)
+    return (struct dtv_pointer) {};
+
+  /* Perform the allocation.  This is the pointer we need to free
+     later.  */
+  void *start = malloc (alloc_size);
+  if (start == NULL)
+    return (struct dtv_pointer) {};
+
+  /* Find the aligned position within the larger allocation.  */
+  void *aligned = (void *) roundup ((uintptr_t) start, alignment);
+
+  return (struct dtv_pointer) { .val = aligned, .to_free = start };
+}
+
+static struct dtv_pointer
+allocate_and_init (struct link_map *map)
+{
+  struct dtv_pointer result = allocate_dtv_entry
+    (map->l_tls_align, map->l_tls_blocksize);
+  if (result.val == NULL)
+    oom ();
+
+  /* Initialize the memory.  */
+  memset (__mempcpy (result.val, map->l_tls_initimage,
+		     map->l_tls_initimage_size),
+	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
+
+  return result;
+}
+
 static void
-allocate_and_init (dtv_t *dtv, struct link_map *map)
+signal_safe_allocate_and_init (dtv_t *dtv, struct link_map *map)
 {
   void *newp;
   newp = __signal_safe_memalign (map->l_tls_align, map->l_tls_blocksize);
@@ -642,13 +689,18 @@ _dl_update_slotinfo (unsigned long int req_modid)
       size_t total = 0;
       sigset_t old;
 
-      _dl_mask_all_signals (&old);
-      /* We use the signal mask as a lock against reentrancy here.
-         Check that a signal taken before the lock didn't already
-         update us.  */
-      dtv = THREAD_DTV ();
-      if (dtv[0].counter >= listp->slotinfo[idx].gen)
-        goto out;
+      if (GLRO(dl_async_signal_safe)) {
+	_dl_mask_all_signals (&old);
+	/* We use the signal mask as a lock against reentrancy here.
+	   Check that a signal taken before the lock didn't already
+	   update us.  */
+	dtv = THREAD_DTV ();
+	if (dtv[0].counter >= listp->slotinfo[idx].gen)
+	  {
+	    _dl_unmask_signals (&old);
+	    return the_map;
+	  }
+      }
       /* We have to look through the entire dtv slotinfo list.  */
       listp =  GL(dl_tls_dtv_slotinfo_list);
       do
@@ -674,13 +726,13 @@ _dl_update_slotinfo (unsigned long int req_modid)
 	      struct link_map *map = listp->slotinfo[cnt].map;
 	      if (map == NULL)
 		{
-		  if (dtv[-1].counter >= total + cnt)
+		  if (dtv[-1].counter >= modid)
 		    {
 		      /* If this modid was used at some point the memory
 			 might still be allocated.  */
-		      __signal_safe_free (dtv[total + cnt].pointer.to_free);
-		      dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
-		      dtv[total + cnt].pointer.to_free = NULL;
+		      __signal_safe_free (dtv[modid].pointer.to_free);
+		      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
+		      dtv[modid].pointer.to_free = NULL;
 		    }
 
 		  continue;
@@ -718,8 +770,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
 
       /* This will be the new maximum generation counter.  */
       dtv[0].counter = new_gen;
-   out:
-      _dl_unmask_signals (&old);
+      if (GLRO(dl_async_signal_safe))
+	_dl_unmask_signals (&old);
     }
 
   return the_map;
@@ -745,6 +797,50 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
 
       the_map = listp->slotinfo[idx].map;
     }
+
+  if (!GLRO(dl_async_signal_safe)) {
+
+  /* Make sure that, if a dlopen running in parallel forces the
+     variable into static storage, we'll wait until the address in the
+     static TLS block is set up, and use that.  If we're undecided
+     yet, make sure we make the decision holding the lock as well.  */
+  if (__glibc_unlikely (the_map->l_tls_offset
+			!= FORCED_DYNAMIC_TLS_OFFSET))
+    {
+      __rtld_lock_lock_recursive (GL(dl_load_lock));
+      if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
+	{
+	  the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
+	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
+	}
+      else if (__glibc_likely (the_map->l_tls_offset
+			       != FORCED_DYNAMIC_TLS_OFFSET))
+	{
+#if TLS_TCB_AT_TP
+	  void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
+#elif TLS_DTV_AT_TP
+	  void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
+	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
+
+	  dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
+	  dtv[GET_ADDR_MODULE].pointer.val = p;
+
+	  return (char *) p + GET_ADDR_OFFSET;
+	}
+      else
+	__rtld_lock_unlock_recursive (GL(dl_load_lock));
+    }
+  struct dtv_pointer result = allocate_and_init (the_map);
+  dtv[GET_ADDR_MODULE].pointer = result;
+  assert (result.to_free != NULL);
+
+  return (char *) result.val + GET_ADDR_OFFSET;
+
+  } else {
+
   sigset_t old;
   _dl_mask_all_signals (&old);
 
@@ -778,11 +874,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
 
   if (offset == FORCED_DYNAMIC_TLS_OFFSET)
     {
-      allocate_and_init (&dtv[GET_ADDR_MODULE], the_map);
+      signal_safe_allocate_and_init (&dtv[GET_ADDR_MODULE], the_map);
     }
   else
     {
-      void **pp = &dtv[GET_ADDR_MODULE].pointer.val;
+      void ** volatile pp = &dtv[GET_ADDR_MODULE].pointer.val;
       while (atomic_forced_read (*pp) == TLS_DTV_UNALLOCATED)
 	{
 	  /* for lack of a better (safe) thing to do, just spin.
@@ -803,6 +899,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
   _dl_unmask_signals (&old);
 
   return (char *) dtv[GET_ADDR_MODULE].pointer.val + GET_ADDR_OFFSET;
+  }
 }
 
 
diff --git a/elf/rtld.c b/elf/rtld.c
index 9250cf4f94..f4e382d9c7 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -2502,6 +2502,9 @@ process_envvars (enum mode *modep)
   enum mode mode = normal;
   char *debug_output = NULL;
 
+  /* Enable async-signal-safe TLS by default.  */
+  GLRO(dl_async_signal_safe) = 1;
+
   /* This is the default place for profiling data file.  */
   GLRO(dl_profile_output)
     = &"/var/tmp\0/var/profile"[__libc_enable_secure ? 9 : 0];
@@ -2525,6 +2528,10 @@ process_envvars (enum mode *modep)
 	  /* Warning level, verbose or not.  */
 	  if (memcmp (envline, "WARN", 4) == 0)
 	    GLRO(dl_verbose) = envline[5] != '\0';
+#if 0 /* enable to get runtime control over async signal safety */
+	  if (memcmp (envline, "SAFE", 4) == 0)
+	    GLRO(dl_async_signal_safe) = 1;
+#endif
 	  break;
 
 	case 5:
@@ -2537,7 +2544,12 @@ process_envvars (enum mode *modep)
 	  if (memcmp (envline, "AUDIT", 5) == 0)
 	    audit_list_string = &envline[6];
 	  break;
-
+#if 0 /* enable to get runtime control over async signal safety */
+	case 6:
+	  if (memcmp (envline, "UNSAFE", 6) == 0)
+	    GLRO(dl_async_signal_safe) = 0;
+	  break;
+#endif
 	case 7:
 	  /* Print information about versions.  */
 	  if (memcmp (envline, "VERBOSE", 7) == 0)
@@ -2666,6 +2678,16 @@ process_envvars (enum mode *modep)
   /* The caller wants this information.  */
   *modep = mode;
 
+#if 0 /* enable this to help debug async-safe TLS */
+  if (GLRO(dl_debug_mask))
+    {
+      if (GLRO(dl_async_signal_safe))
+	_dl_printf ("TLS is async-signal-safe\n");
+      else
+	_dl_printf ("TLS is NOT async-signal-safe\n");
+    }
+#endif /* for async-safe TLS */
+
   /* Extra security for SUID binaries.  Remove all dangerous environment
      variables.  */
   if (__builtin_expect (__libc_enable_secure, 0))
diff --git a/nptl/Makefile b/nptl/Makefile
index 093df4fa1f..2ffc54bfd1 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -374,7 +374,7 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
 	 tst-oncex3 tst-oncex4
 ifeq ($(build-shared),yes)
 tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder
-tests += tst-tls7
+tests += tst-tls7a
 tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1
 tests-nolibpthread += tst-fini1
 ifeq ($(have-z-execstack),yes)
@@ -390,7 +390,7 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
 		tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \
 		tst-_res1mod1 tst-_res1mod2 tst-fini1mod \
 		tst-join7mod tst-compat-forwarder-mod
-modules-names += tst-tls7mod
+modules-names += tst-tls7amod
 ifneq ($(with-clang),yes)
 modules-names += tst-execstack-mod
 endif
@@ -408,7 +408,7 @@ tst-tls5modc.so-no-z-defs = yes
 tst-tls5modd.so-no-z-defs = yes
 tst-tls5mode.so-no-z-defs = yes
 tst-tls5modf.so-no-z-defs = yes
-tst-tls7mod.so-no-z-defs = yes
+tst-tls7amod.so-no-z-defs = yes
 
 ifeq ($(build-shared),yes)
 # Build all the modules even when not actually running test programs.
@@ -603,11 +603,9 @@ $(objpfx)tst-tls5: $(objpfx)tst-tls5mod.so $(shared-thread-library)
 LDFLAGS-tst-tls5 = $(no-as-needed)
 LDFLAGS-tst-tls5mod.so = -Wl,-soname,tst-tls5mod.so
 
-# ensure free(malloc()) isn't optimized out
-CFLAGS-tst-tls7.c = -fno-builtin-malloc -fno-builtin-free
-$(objpfx)tst-tls7: $(libdl) $(shared-thread-library)
-$(objpfx)tst-tls7.out: $(objpfx)tst-tls7mod.so
-$(objpfx)tst-tls7mod.so: $(shared-thread-library)
+$(objpfx)tst-tls7a: $(libdl) $(shared-thread-library)
+$(objpfx)tst-tls7a.out: $(objpfx)tst-tls7amod.so
+$(objpfx)tst-tls7amod.so: $(shared-thread-library)
 
 ifeq ($(build-shared),yes)
 $(objpfx)tst-tls6.out: tst-tls6.sh $(objpfx)tst-tls5 \
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 49bfc66b95..9f6a75695e 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -1231,6 +1231,7 @@ __nptl_setxid (struct xid_command *cmdp)
 static inline void __attribute__((always_inline))
 init_one_static_tls (struct pthread *curp, struct link_map *map)
 {
+  dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
 # if TLS_TCB_AT_TP
   void *dest = (char *) curp - map->l_tls_offset;
 # elif TLS_DTV_AT_TP
@@ -1243,15 +1244,11 @@ init_one_static_tls (struct pthread *curp, struct link_map *map)
   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
-#if 0 /* still needed? dtv refs gone in current code */
-  /* Fill in the DTV slot so that a later LD/GD access will find it.  */
-  dtv[map->l_tls_modid].pointer.is_static = true;
   /* Pairs against the read barrier in tls_get_attr_tail, guaranteeing
      any thread waiting for an update to pointer.val sees the
      initimage write.  */
   atomic_write_barrier ();
   dtv[map->l_tls_modid].pointer.val = dest;
-#endif
 }
 
 void
diff --git a/nptl/tst-tls7.c b/nptl/tst-tls7a.c
index 3e85a6ec85..60530037c4 100644
--- a/nptl/tst-tls7.c
+++ b/nptl/tst-tls7a.c
@@ -20,7 +20,6 @@
    from a signal handler.  */
 
 #include <assert.h>
-#include <atomic.h>
 #include <dlfcn.h>
 #include <pthread.h>
 #include <semaphore.h>
@@ -34,7 +33,9 @@ spin (void *ignored)
   while (1)
     {
       /* busywork */
-      free (malloc (128));
+      void *volatile p;
+      p = malloc (128);
+      free (p);
     }
 
   /* never reached */
@@ -48,7 +49,7 @@ action (int signo, siginfo_t *info, void *ignored)
 {
   sem_t *sem = info->si_value.sival_ptr;
 
-  atomic_read_barrier ();
+  __asm ("" ::: "memory");  // atomic_read_barrier
   assert (tls7mod_action != NULL);
   (*tls7mod_action) (signo, info, ignored);
 
@@ -75,7 +76,7 @@ do_test (void)
 
   for (int i = 0; i < NITERS; ++i)
     {
-      void *h = dlopen ("tst-tls7mod.so", RTLD_LAZY);
+      void *h = dlopen ("tst-tls7amod.so", RTLD_LAZY);
       if (h == NULL)
         {
           puts ("dlopen failed");
@@ -88,7 +89,7 @@ do_test (void)
           puts ("dlsym for action failed");
           exit (1);
         }
-      atomic_write_barrier ();
+      __asm ("" ::: "memory");  // atomic_write_barrier
 
       struct sigaction sa;
       sa.sa_sigaction = action;
diff --git a/nptl/tst-tls7mod.c b/nptl/tst-tls7amod.c
index da5af56370..da5af56370 100644
--- a/nptl/tst-tls7mod.c
+++ b/nptl/tst-tls7amod.c
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index a5822302b4..7ea931817b 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -521,6 +521,9 @@ struct rtld_global_ro
      linking.  */
   EXTERN int _dl_dynamic_weak;
 
+  /* Nonzero if TLS handling should be async-signal-safe.  */
+  EXTERN int _dl_async_signal_safe;
+
   /* Default floating-point control word.  */
   EXTERN fpu_control_t _dl_fpu_control;