summary refs log tree commit diff
path: root/sysdeps/aarch64/dl-machine.h
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2017-09-27 16:55:14 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2017-11-03 14:41:35 +0000
commitb7cf203b5c17dd6d9878537d41e0c7cc3d270a67 (patch)
treed6b6f9502a9335229a372decf16f4d050897ceb0 /sysdeps/aarch64/dl-machine.h
parent0cc5b022f817eeaa81735ae58717b5dabae92941 (diff)
downloadglibc-b7cf203b5c17dd6d9878537d41e0c7cc3d270a67.tar.gz
glibc-b7cf203b5c17dd6d9878537d41e0c7cc3d270a67.tar.xz
glibc-b7cf203b5c17dd6d9878537d41e0c7cc3d270a67.zip
aarch64: Disable lazy symbol binding of TLSDESC
Always do TLS descriptor initialization at load time during relocation
processing to avoid barriers at every TLS access. In non-dlopened shared
libraries the overhead of tls access vs static global access is > 3x
bigger when lazy initialization is used (_dl_tlsdesc_return_lazy)
compared to bind-now (_dl_tlsdesc_return) so the barriers dominate tls
access performance.

TLSDESC relocs are in DT_JMPREL which are processed at load time using
elf_machine_lazy_rel which is only supposed to do lightweight
initialization using the DT_TLSDESC_PLT trampoline (the trampoline code
jumps to the entry point in DT_TLSDESC_GOT which does the lazy tlsdesc
initialization at runtime).  This patch changes elf_machine_lazy_rel
in aarch64 to do the symbol binding and initialization as if DF_BIND_NOW
was set, so the non-lazy code path of elf/do-rel.h was replicated.

The static linker could be changed to emit TLSDESC relocs in DT_REL*,
which are processed non-lazily, but the goal of this patch is to always
guarantee bind-now semantics, even if the binary was produced with an
old linker, so the barriers can be dropped in tls descriptor functions.

After this change the synchronizing ldar instructions can be dropped
as well as the lazy initialization machinery including the DT_TLSDESC_GOT
setup.

I believe this should be done on all targets, including ones where no
barrier is needed for lazy initialization.  There is very little gain in
optimizing for large number of symbolic tlsdesc relocations which is an
extremely uncommon case.  And currently the tlsdesc entries are only
readonly protected with -z now and some hardennings against writable
JUMPSLOT relocs don't work for TLSDESC so they are a security hazard.
(But to fix that the static linker has to be changed.)

	* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
	binding and initialization non-lazily for R_AARCH64_TLSDESC.
Diffstat (limited to 'sysdeps/aarch64/dl-machine.h')
-rw-r--r--sysdeps/aarch64/dl-machine.h19
1 files changed, 14 insertions, 5 deletions
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index e7656129f5..837e28134b 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -399,12 +399,21 @@ elf_machine_lazy_rel (struct link_map *map,
     }
   else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1))
     {
-      struct tlsdesc volatile *td =
-	(struct tlsdesc volatile *)reloc_addr;
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
 
-      td->arg = (void*)reloc;
-      td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
-			  + map->l_addr);
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
     }
   else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE)))
     {