about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.S37
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.h3
-rw-r--r--sysdeps/aarch64/tlsdesc.c36
3 files changed, 64 insertions, 12 deletions
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index be9b9b394c..c7adf79baf 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -79,6 +79,29 @@ _dl_tlsdesc_return:
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
+	/* Same as _dl_tlsdesc_return but with synchronization for
+	   lazy relocation.
+	   Prototype:
+	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return_lazy
+	.global	_dl_tlsdesc_return_lazy
+	.type	_dl_tlsdesc_return_lazy,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return_lazy:
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
 	/* Handler for undefined weak TLS symbols.
 	   Prototype:
 	   _dl_tlsdesc_undefweak (tlsdesc *);
@@ -96,6 +119,13 @@ _dl_tlsdesc_return:
 _dl_tlsdesc_undefweak:
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset(16)
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
 	ldr	x0, [x0, #8]
 	mrs	x1, tpidr_el0
 	sub	x0, x0, x1
@@ -152,6 +182,13 @@ _dl_tlsdesc_dynamic:
 	stp	x3,  x4, [sp, #32+16*1]
 
 	mrs	x4, tpidr_el0
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
 	ldr	x1, [x0,#8]
 	ldr	x0, [x4]
 	ldr	x3, [x1,#16]
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 7a1285e044..e6c0078eb6 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,6 +46,9 @@ extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_return (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_return_lazy (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_undefweak (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4821f8c08a..9f3ff9b662 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -25,6 +25,7 @@
 #include <dl-tlsdesc.h>
 #include <dl-unmap-segments.h>
 #include <tlsdeschtab.h>
+#include <atomic.h>
 
 /* The following functions take an entry_check_offset argument.  It's
    computed by the caller as an offset between its entry point and the
@@ -39,11 +40,15 @@
 
 void
 attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
-				struct link_map *l)
+_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
 {
-  const ElfW(Rela) *reloc = td->arg;
+  const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
 
+  /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
+     initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
+     callers will return and retry calling td->entry.  The updated td->entry
+     synchronizes with the single writer so all read accesses here can use
+     relaxed order.  */
   if (_dl_tlsdesc_resolve_early_return_p
       (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
     return;
@@ -86,8 +91,10 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 
   if (!sym)
     {
-      td->arg = (void*) reloc->r_addend;
-      td->entry = _dl_tlsdesc_undefweak;
+      atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
+      /* This release store synchronizes with the ldar acquire load
+	 instruction in _dl_tlsdesc_undefweak.  */
+      atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
     }
   else
     {
@@ -96,16 +103,22 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 #  else
       if (!TRY_STATIC_TLS (l, result))
 	{
-	  td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
+	  void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
 					      + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_dynamic;
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_dynamic.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
 	}
       else
 #  endif
 	{
-	  td->arg = (void*) (sym->st_value + result->l_tls_offset
+	  void *p = (void*) (sym->st_value + result->l_tls_offset
 			     + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_return;
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_return_lazy.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
 	}
     }
 
@@ -120,11 +133,10 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 
 void
 attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
-				void *caller)
+_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
 {
   /* Maybe we're lucky and can return early.  */
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     return;
 
   /* Locking here will stop execution until the running resolver runs