about summary refs log tree commit diff
path: root/sysdeps/aarch64/dl-tlsdesc.S
diff options
context:
space:
mode:
authorMarcus Shawcroft <marcus.shawcroft@arm.com>2014-02-10 15:36:16 +0000
committerMarcus Shawcroft <marcus.shawcroft@arm.com>2014-02-11 11:36:00 +0000
commit75eff3fe90f96783f31f58fa84af1b77e57d1ae4 (patch)
tree28a8fda330f53de0eccd291f9c5e52a2056a983a /sysdeps/aarch64/dl-tlsdesc.S
parentd35f1e80730ce5afbd4694f2d2e537e4af32a797 (diff)
downloadglibc-75eff3fe90f96783f31f58fa84af1b77e57d1ae4.tar.gz
glibc-75eff3fe90f96783f31f58fa84af1b77e57d1ae4.tar.xz
glibc-75eff3fe90f96783f31f58fa84af1b77e57d1ae4.zip
Relocate AArch64 from ports to libc.
This patch moves the AArch64 port to the main sysdeps hierarchy.  The
move is essentially:

  git mv ports/sysdeps/aarch64 sysdeps/aarch64
  git mv ports/sysdeps/unix/sysv/linux/aarch64 sysdeps/unix/sysv/linux/aarch64

The README is updated and I've updated ChangeLog.aarch64 along the
lines of the ARM move.  The AArch64 build has been tested to confirm
that there were no changes in objdump -dr output or the shared
objects.
Diffstat (limited to 'sysdeps/aarch64/dl-tlsdesc.S')
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.S329
1 files changed, 329 insertions, 0 deletions
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
new file mode 100644
index 0000000000..ded5471bea
--- /dev/null
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -0,0 +1,329 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 version.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [sp, #32*1];			\
+	stp	 q4,  q5, [sp, #32*2];			\
+	stp	 q6,  q7, [sp, #32*3];			\
+	stp	 q8,  q9, [sp, #32*4];			\
+	stp	q10, q11, [sp, #32*5];			\
+	stp	q12, q13, [sp, #32*6];			\
+	stp	q14, q15, [sp, #32*7];			\
+	stp	q16, q17, [sp, #32*8];			\
+	stp	q18, q19, [sp, #32*9];			\
+	stp	q20, q21, [sp, #32*10];			\
+	stp	q22, q23, [sp, #32*11];			\
+	stp	q24, q25, [sp, #32*12];			\
+	stp	q26, q27, [sp, #32*13];			\
+	stp	q28, q29, [sp, #32*14];			\
+	stp	q30, q31, [sp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [sp, #32*1];			\
+	ldp	 q4,  q5, [sp, #32*2];			\
+	ldp	 q6,  q7, [sp, #32*3];			\
+	ldp	 q8,  q9, [sp, #32*4];			\
+	ldp	q10, q11, [sp, #32*5];			\
+	ldp	q12, q13, [sp, #32*6];			\
+	ldp	q14, q15, [sp, #32*7];			\
+	ldp	q16, q17, [sp, #32*8];			\
+	ldp	q18, q19, [sp, #32*9];			\
+	ldp	q20, q21, [sp, #32*10];			\
+	ldp	q22, q23, [sp, #32*11];			\
+	ldp	q24, q25, [sp, #32*12];			\
+	ldp	q26, q27, [sp, #32*13];			\
+	ldp	q28, q29, [sp, #32*14];			\
+	ldp	q30, q31, [sp, #32*15];			\
+	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the thread pointer offset for symbols in the static
+	   TLS block. The offset is the same for all threads.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Handler for undefined weak TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *);
+
+	   The second word of the descriptor contains the addend.
+	   Return the addend minus the thread pointer. This ensures
+	   that when the caller adds on the thread pointer it gets back
+	   the addend.  */
+
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	str	x1, [sp, #-16]!
+	cfi_adjust_cfa_offset(16)
+	ldr	x0, [x0, #8]
+	mrs	x1, tpidr_el0
+	sub	x0, x0, x1
+	ldr	x1, [sp], #16
+	cfi_adjust_cfa_offset(16)
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   __attribute__ ((__regparm__ (1)))
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }
+	 */
+
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+# define NSAVEXREGPAIRS 2
+	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+
+	stp	x1,  x2, [sp, #32+16*0]
+	stp	x3,  x4, [sp, #32+16*1]
+
+	mrs	x4, tpidr_el0
+	ldr	x1, [x0,#8]
+	ldr	x0, [x4]
+	ldr	x3, [x1,#16]
+	ldr	x2, [x0]
+	cmp	x3, x2
+	b.hi	2f
+	ldr	x2, [x1]
+	add	x0, x0, x2, lsl #4
+	ldr	x0, [x0]
+	cmn	x0, #0x1
+	b.eq	2f
+	ldr	x1, [x1,#8]
+	add	x0, x0, x1
+	sub	x0, x0, x4
+1:
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+# undef NSAVEXREGPAIRS
+	RET
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+# define NSAVEXREGPAIRS 7
+	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
+	stp	 x7,  x8, [sp, #16*1]
+	stp	 x9, x10, [sp, #16*2]
+	stp	x11, x12, [sp, #16*3]
+	stp	x13, x14, [sp, #16*4]
+	stp	x15, x16, [sp, #16*5]
+	stp	x17, x18, [sp, #16*6]
+
+	SAVE_Q_REGISTERS
+
+	mov	x0, x1
+	bl	__tls_get_addr
+
+	mrs	x1, tpidr_el0
+	sub	x0, x0, x1
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 x7,  x8, [sp, #16*1]
+	ldp	 x9, x10, [sp, #16*2]
+	ldp	x11, x12, [sp, #16*3]
+	ldp	x13, x14, [sp, #16*4]
+	ldp	x15, x16, [sp, #16*5]
+	ldp	x17, x18, [sp, #16*6]
+	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+#endif
+
+	/* This function is a wrapper for a lazy resolver for TLS_DESC
+	   RELA relocations.
+	   When the actual resolver returns, it will have adjusted the
+	   TLS descriptor such that we can tail-call it for it to return
+	   the TP offset of the symbol.  */
+
+	.hidden _dl_tlsdesc_resolve_rela
+	.global	_dl_tlsdesc_resolve_rela
+	.type	_dl_tlsdesc_resolve_rela,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_resolve_rela:
+#define	NSAVEXREGPAIRS 9
+	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	stp	 x1,  x4, [sp, #32+16*0]
+	stp	 x5,  x6, [sp, #32+16*1]
+	stp	 x7,  x8, [sp, #32+16*2]
+	stp	 x9, x10, [sp, #32+16*3]
+	stp	x11, x12, [sp, #32+16*4]
+	stp	x13, x14, [sp, #32+16*5]
+	stp	x15, x16, [sp, #32+16*6]
+	stp	x17, x18, [sp, #32+16*7]
+	str	x0,       [sp, #32+16*8]
+
+	SAVE_Q_REGISTERS
+
+	ldr	x1, [x3, #8]
+	bl	_dl_tlsdesc_resolve_rela_fixup
+
+	RESTORE_Q_REGISTERS
+
+	ldr	x0, [sp, #32+16*8]
+	ldr	x1, [x0]
+	blr	x1
+
+	ldp	 x1,  x4, [sp, #32+16*0]
+	ldp	 x5,  x6, [sp, #32+16*1]
+	ldp	 x7,  x8, [sp, #32+16*2]
+	ldp	 x9, x10, [sp, #32+16*3]
+	ldp	x11, x12, [sp, #32+16*4]
+	ldp	x13, x14, [sp, #32+16*5]
+	ldp	x15, x16, [sp, #32+16*6]
+	ldp	x17, x18, [sp, #32+16*7]
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32+16*NSAVEXREGPAIRS)
+	ldp	x2, x3, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+	RET
+#undef NSAVEXREGPAIRS
+	cfi_endproc
+	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
+
+	/* This function is a placeholder for lazy resolving of TLS
+	relocations.  Once some thread starts resolving a TLS
+	relocation, it sets up the TLS descriptor to use this
+	resolver, such that other threads that would attempt to
+	resolve it concurrently may skip the call to the original lazy
+	resolver and go straight to a condition wait.
+
+	When the actual resolver returns, it will have adjusted the
+	TLS descriptor such that we can tail-call it for it to return
+	the TP offset of the symbol.  */
+
+	.hidden _dl_tlsdesc_resolve_hold
+	.global	_dl_tlsdesc_resolve_hold
+	.type	_dl_tlsdesc_resolve_hold,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_resolve_hold:
+#define	NSAVEXREGPAIRS 10
+1:
+	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	stp	 x1,  x2, [sp, #32+16*0]
+	stp	 x3,  x4, [sp, #32+16*1]
+	stp	 x5,  x6, [sp, #32+16*2]
+	stp	 x7,  x8, [sp, #32+16*3]
+	stp	 x9, x10, [sp, #32+16*4]
+	stp	x11, x12, [sp, #32+16*5]
+	stp	x13, x14, [sp, #32+16*6]
+	stp	x15, x16, [sp, #32+16*7]
+	stp	x17, x18, [sp, #32+16*8]
+	str	x0,       [sp, #32+16*9]
+
+	SAVE_Q_REGISTERS
+
+	adr	x1, 1b
+	bl	_dl_tlsdesc_resolve_hold_fixup
+
+	RESTORE_Q_REGISTERS
+
+	ldr	x0, [sp, #32+16*9]
+	ldr	x1, [x0]
+	blr	x1
+
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+	ldp	 x5,  x6, [sp, #32+16*2]
+	ldp	 x7,  x8, [sp, #32+16*3]
+	ldp	 x9, x10, [sp, #32+16*4]
+	ldp	x11, x12, [sp, #32+16*5]
+	ldp	x13, x14, [sp, #32+16*6]
+	ldp	x15, x16, [sp, #32+16*7]
+	ldp	x17, x18, [sp, #32+16*8]
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32+16*NSAVEXREGPAIRS)
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
+#undef NSAVEXREGPAIRS