about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--NEWS14
-rw-r--r--elf/tlsdeschtab.h8
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.S37
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.h3
-rw-r--r--sysdeps/aarch64/tlsdesc.c36
6 files changed, 91 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 5e93d9e99e..1d4a376983 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2015-06-17  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
+	[BZ #18034]
+	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Declare.
+	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Define.
+	(_dl_tlsdesc_undefweak): Guarantee TLSDESC entry and argument load-load
+	ordering using ldar.
+	(_dl_tlsdesc_dynamic): Likewise.
+	(_dl_tlsdesc_return_lazy): Likewise.
+	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Use
+	relaxed atomics instead of volatile and synchronize with release store.
+	(_dl_tlsdesc_resolve_hold_fixup): Use relaxed atomics instead of
+	volatile.
+	* elf/tlsdeschtab.h (_dl_tlsdesc_resolve_early_return_p): Likewise.
+
 2015-06-15  Andrew Senkevich  <andrew.senkevich@intel.com>
 
 	* sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New symbols added.
diff --git a/NEWS b/NEWS
index 33cba7b652..b215276b07 100644
--- a/NEWS
+++ b/NEWS
@@ -15,13 +15,13 @@ Version 2.22
   17581, 17588, 17596, 17620, 17621, 17628, 17631, 17692, 17711, 17715,
   17776, 17779, 17792, 17836, 17912, 17916, 17930, 17932, 17944, 17949,
   17964, 17965, 17967, 17969, 17978, 17987, 17991, 17996, 17998, 17999,
-  18007, 18019, 18020, 18029, 18030, 18032, 18036, 18038, 18039, 18042,
-  18043, 18046, 18047, 18049, 18068, 18080, 18093, 18100, 18104, 18110,
-  18111, 18116, 18125, 18128, 18138, 18185, 18196, 18197, 18206, 18210,
-  18211, 18217, 18220, 18221, 18234, 18244, 18247, 18287, 18319, 18324,
-  18333, 18346, 18397, 18409, 18410, 18412, 18418, 18422, 18434, 18444,
-  18468, 18469, 18470, 18479, 18483, 18495, 18496, 18497, 18498, 18507,
-  18512, 18519, 18520, 18522, 18527, 18528, 18529, 18530.
+  18007, 18019, 18020, 18029, 18030, 18032, 18034, 18036, 18038, 18039,
+  18042, 18043, 18046, 18047, 18049, 18068, 18080, 18093, 18100, 18104,
+  18110, 18111, 18116, 18125, 18128, 18138, 18185, 18196, 18197, 18206,
+  18210, 18211, 18217, 18220, 18221, 18234, 18244, 18247, 18287, 18319,
+  18324, 18333, 18346, 18397, 18409, 18410, 18412, 18418, 18422, 18434,
+  18444, 18468, 18469, 18470, 18479, 18483, 18495, 18496, 18497, 18498,
+  18507, 18512, 18519, 18520, 18522, 18527, 18528, 18529, 18530.
 
 * Cache information can be queried via sysconf() function on s390 e.g. with
   _SC_LEVEL1_ICACHE_SIZE as argument.
diff --git a/elf/tlsdeschtab.h b/elf/tlsdeschtab.h
index d13b4e57c7..fb0eb88e7e 100644
--- a/elf/tlsdeschtab.h
+++ b/elf/tlsdeschtab.h
@@ -20,6 +20,8 @@
 #ifndef TLSDESCHTAB_H
 # define TLSDESCHTAB_H 1
 
+#include <atomic.h>
+
 # ifdef SHARED
 
 #  include <inline-hashtab.h>
@@ -138,17 +140,17 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
 static int
 _dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
 {
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     return 1;
 
   __rtld_lock_lock_recursive (GL(dl_load_lock));
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     {
       __rtld_lock_unlock_recursive (GL(dl_load_lock));
       return 1;
     }
 
-  td->entry = _dl_tlsdesc_resolve_hold;
+  atomic_store_relaxed (&td->entry, _dl_tlsdesc_resolve_hold);
 
   return 0;
 }
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index be9b9b394c..c7adf79baf 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -79,6 +79,29 @@ _dl_tlsdesc_return:
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
+	/* Same as _dl_tlsdesc_return but with synchronization for
+	   lazy relocation.
+	   Prototype:
+	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return_lazy
+	.global	_dl_tlsdesc_return_lazy
+	.type	_dl_tlsdesc_return_lazy,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return_lazy:
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
 	/* Handler for undefined weak TLS symbols.
 	   Prototype:
 	   _dl_tlsdesc_undefweak (tlsdesc *);
@@ -96,6 +119,13 @@ _dl_tlsdesc_return:
 _dl_tlsdesc_undefweak:
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset(16)
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
 	ldr	x0, [x0, #8]
 	mrs	x1, tpidr_el0
 	sub	x0, x0, x1
@@ -152,6 +182,13 @@ _dl_tlsdesc_dynamic:
 	stp	x3,  x4, [sp, #32+16*1]
 
 	mrs	x4, tpidr_el0
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
 	ldr	x1, [x0,#8]
 	ldr	x0, [x4]
 	ldr	x3, [x1,#16]
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 7a1285e044..e6c0078eb6 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,6 +46,9 @@ extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_return (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_return_lazy (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_undefweak (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4821f8c08a..9f3ff9b662 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -25,6 +25,7 @@
 #include <dl-tlsdesc.h>
 #include <dl-unmap-segments.h>
 #include <tlsdeschtab.h>
+#include <atomic.h>
 
 /* The following functions take an entry_check_offset argument.  It's
    computed by the caller as an offset between its entry point and the
@@ -39,11 +40,15 @@
 
 void
 attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
-				struct link_map *l)
+_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
 {
-  const ElfW(Rela) *reloc = td->arg;
+  const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
 
+  /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
+     initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
+     callers will return and retry calling td->entry.  The updated td->entry
+     synchronizes with the single writer so all read accesses here can use
+     relaxed order.  */
   if (_dl_tlsdesc_resolve_early_return_p
       (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
     return;
@@ -86,8 +91,10 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 
   if (!sym)
     {
-      td->arg = (void*) reloc->r_addend;
-      td->entry = _dl_tlsdesc_undefweak;
+      atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
+      /* This release store synchronizes with the ldar acquire load
+	 instruction in _dl_tlsdesc_undefweak.  */
+      atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
     }
   else
     {
@@ -96,16 +103,22 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 #  else
       if (!TRY_STATIC_TLS (l, result))
 	{
-	  td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
+	  void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
 					      + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_dynamic;
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_dynamic.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
 	}
       else
 #  endif
 	{
-	  td->arg = (void*) (sym->st_value + result->l_tls_offset
+	  void *p = (void*) (sym->st_value + result->l_tls_offset
 			     + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_return;
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_return_lazy.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
 	}
     }
 
@@ -120,11 +133,10 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 
 void
 attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
-				void *caller)
+_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
 {
   /* Maybe we're lucky and can return early.  */
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     return;
 
   /* Locking here will stop execution until the running resolver runs