about summary refs log tree commit diff
path: root/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S')
-rw-r--r--REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S373
1 files changed, 373 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S
new file mode 100644
index 0000000000..94679a04ca
--- /dev/null
+++ b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S
@@ -0,0 +1,373 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 version.
+   Copyright (C) 2011-2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [sp, #32*1];			\
+	stp	 q4,  q5, [sp, #32*2];			\
+	stp	 q6,  q7, [sp, #32*3];			\
+	stp	 q8,  q9, [sp, #32*4];			\
+	stp	q10, q11, [sp, #32*5];			\
+	stp	q12, q13, [sp, #32*6];			\
+	stp	q14, q15, [sp, #32*7];			\
+	stp	q16, q17, [sp, #32*8];			\
+	stp	q18, q19, [sp, #32*9];			\
+	stp	q20, q21, [sp, #32*10];			\
+	stp	q22, q23, [sp, #32*11];			\
+	stp	q24, q25, [sp, #32*12];			\
+	stp	q26, q27, [sp, #32*13];			\
+	stp	q28, q29, [sp, #32*14];			\
+	stp	q30, q31, [sp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [sp, #32*1];			\
+	ldp	 q4,  q5, [sp, #32*2];			\
+	ldp	 q6,  q7, [sp, #32*3];			\
+	ldp	 q8,  q9, [sp, #32*4];			\
+	ldp	q10, q11, [sp, #32*5];			\
+	ldp	q12, q13, [sp, #32*6];			\
+	ldp	q14, q15, [sp, #32*7];			\
+	ldp	q16, q17, [sp, #32*8];			\
+	ldp	q18, q19, [sp, #32*9];			\
+	ldp	q20, q21, [sp, #32*10];			\
+	ldp	q22, q23, [sp, #32*11];			\
+	ldp	q24, q25, [sp, #32*12];			\
+	ldp	q26, q27, [sp, #32*13];			\
+	ldp	q28, q29, [sp, #32*14];			\
+	ldp	q30, q31, [sp, #32*15];			\
+	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the thread pointer offset for symbols in the static
+	   TLS block. The offset is the same for all threads.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	DELOUSE (0)
+	ldr	PTR_REG (0), [x0, #PTR_SIZE]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Same as _dl_tlsdesc_return but with synchronization for
+	   lazy relocation.
+	   Prototype:
+	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return_lazy
+	.global	_dl_tlsdesc_return_lazy
+	.type	_dl_tlsdesc_return_lazy,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return_lazy:
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
+	DELOUSE (0)
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (0), [x0, #PTR_SIZE]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
+	/* Handler for undefined weak TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *);
+
+	   The second word of the descriptor contains the addend.
+	   Return the addend minus the thread pointer. This ensures
+	   that when the caller adds on the thread pointer it gets back
+	   the addend.  */
+
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	str	x1, [sp, #-16]!
+	cfi_adjust_cfa_offset (16)
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	DELOUSE (0)
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (0), [x0, #PTR_SIZE]
+	mrs	x1, tpidr_el0
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
+	ldr	x1, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   __attribute__ ((__regparm__ (1)))
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }
+	 */
+
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+# define NSAVEXREGPAIRS 2
+	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	DELOUSE (0)
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+
+	stp	x1,  x2, [sp, #32+16*0]
+	stp	x3,  x4, [sp, #32+16*1]
+
+	mrs	x4, tpidr_el0
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg.  */
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
+	ldr	PTR_REG (0), [x4,#TCBHEAD_DTV]
+	ldr	PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
+	ldr	PTR_REG (2), [x0,#DTV_COUNTER]
+	cmp	PTR_REG (3), PTR_REG (2)
+	b.hi	2f
+	ldr	PTR_REG (2), [x1,#TLSDESC_MODID]
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
+	ldr	PTR_REG (0), [x0] /* Load val member of DTV entry.  */
+	cmp	x0, #TLS_DTV_UNALLOCATED
+	b.eq	2f
+	ldr	PTR_REG (1), [x1,#TLSDESC_MODOFF]
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (1)
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (4)
+1:
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+# undef NSAVEXREGPAIRS
+	RET
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+# define NSAVEXREGPAIRS 7
+	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
+	stp	 x7,  x8, [sp, #16*1]
+	stp	 x9, x10, [sp, #16*2]
+	stp	x11, x12, [sp, #16*3]
+	stp	x13, x14, [sp, #16*4]
+	stp	x15, x16, [sp, #16*5]
+	stp	x17, x18, [sp, #16*6]
+
+	SAVE_Q_REGISTERS
+
+	mov	x0, x1
+	bl	__tls_get_addr
+
+	mrs	x1, tpidr_el0
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 x7,  x8, [sp, #16*1]
+	ldp	 x9, x10, [sp, #16*2]
+	ldp	x11, x12, [sp, #16*3]
+	ldp	x13, x14, [sp, #16*4]
+	ldp	x15, x16, [sp, #16*5]
+	ldp	x17, x18, [sp, #16*6]
+	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+#endif
+
+	/* This function is a wrapper for a lazy resolver for TLS_DESC
+	   RELA relocations.
+	   When the actual resolver returns, it will have adjusted the
+	   TLS descriptor such that we can tail-call it for it to return
+	   the TP offset of the symbol.  */
+
+	.hidden _dl_tlsdesc_resolve_rela
+	.global	_dl_tlsdesc_resolve_rela
+	.type	_dl_tlsdesc_resolve_rela,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_resolve_rela:
+#define	NSAVEXREGPAIRS 9
+	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	stp	 x1,  x4, [sp, #32+16*0]
+	stp	 x5,  x6, [sp, #32+16*1]
+	stp	 x7,  x8, [sp, #32+16*2]
+	stp	 x9, x10, [sp, #32+16*3]
+	stp	x11, x12, [sp, #32+16*4]
+	stp	x13, x14, [sp, #32+16*5]
+	stp	x15, x16, [sp, #32+16*6]
+	stp	x17, x18, [sp, #32+16*7]
+	str	x0,       [sp, #32+16*8]
+
+	SAVE_Q_REGISTERS
+
+	DELOUSE (3)
+	ldr	PTR_REG (1), [x3, #PTR_SIZE]
+	bl	_dl_tlsdesc_resolve_rela_fixup
+
+	RESTORE_Q_REGISTERS
+
+	ldr	x0, [sp, #32+16*8]
+	DELOUSE (0)
+	ldr	PTR_REG (1), [x0]
+	blr	x1
+
+	ldp	 x1,  x4, [sp, #32+16*0]
+	ldp	 x5,  x6, [sp, #32+16*1]
+	ldp	 x7,  x8, [sp, #32+16*2]
+	ldp	 x9, x10, [sp, #32+16*3]
+	ldp	x11, x12, [sp, #32+16*4]
+	ldp	x13, x14, [sp, #32+16*5]
+	ldp	x15, x16, [sp, #32+16*6]
+	ldp	x17, x18, [sp, #32+16*7]
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+	ldp	x2, x3, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+	RET
+#undef NSAVEXREGPAIRS
+	cfi_endproc
+	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
+
+	/* This function is a placeholder for lazy resolving of TLS
+	relocations.  Once some thread starts resolving a TLS
+	relocation, it sets up the TLS descriptor to use this
+	resolver, such that other threads that would attempt to
+	resolve it concurrently may skip the call to the original lazy
+	resolver and go straight to a condition wait.
+
+	When the actual resolver returns, it will have adjusted the
+	TLS descriptor such that we can tail-call it for it to return
+	the TP offset of the symbol.  */
+
+	.hidden _dl_tlsdesc_resolve_hold
+	.global	_dl_tlsdesc_resolve_hold
+	.type	_dl_tlsdesc_resolve_hold,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_resolve_hold:
+#define	NSAVEXREGPAIRS 10
+1:
+	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	stp	 x1,  x2, [sp, #32+16*0]
+	stp	 x3,  x4, [sp, #32+16*1]
+	stp	 x5,  x6, [sp, #32+16*2]
+	stp	 x7,  x8, [sp, #32+16*3]
+	stp	 x9, x10, [sp, #32+16*4]
+	stp	x11, x12, [sp, #32+16*5]
+	stp	x13, x14, [sp, #32+16*6]
+	stp	x15, x16, [sp, #32+16*7]
+	stp	x17, x18, [sp, #32+16*8]
+	str	x0,       [sp, #32+16*9]
+
+	SAVE_Q_REGISTERS
+
+	adr	x1, 1b
+	bl	_dl_tlsdesc_resolve_hold_fixup
+
+	RESTORE_Q_REGISTERS
+
+	ldr	x0, [sp, #32+16*9]
+	DELOUSE (0)
+	ldr	PTR_REG (1), [x0]
+	blr	x1
+
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+	ldp	 x5,  x6, [sp, #32+16*2]
+	ldp	 x7,  x8, [sp, #32+16*3]
+	ldp	 x9, x10, [sp, #32+16*4]
+	ldp	x11, x12, [sp, #32+16*5]
+	ldp	x13, x14, [sp, #32+16*6]
+	ldp	x15, x16, [sp, #32+16*7]
+	ldp	x17, x18, [sp, #32+16*8]
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
+#undef NSAVEXREGPAIRS