diff options
Diffstat (limited to 'REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S')
-rw-r--r-- | REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S new file mode 100644 index 0000000000..94679a04ca --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S @@ -0,0 +1,373 @@ +/* Thread-local storage handling in the ELF dynamic linker. + AArch64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include "tlsdesc.h" + +#define NSAVEDQREGPAIRS 16 +#define SAVE_Q_REGISTERS \ + stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \ + cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \ + stp q2, q3, [sp, #32*1]; \ + stp q4, q5, [sp, #32*2]; \ + stp q6, q7, [sp, #32*3]; \ + stp q8, q9, [sp, #32*4]; \ + stp q10, q11, [sp, #32*5]; \ + stp q12, q13, [sp, #32*6]; \ + stp q14, q15, [sp, #32*7]; \ + stp q16, q17, [sp, #32*8]; \ + stp q18, q19, [sp, #32*9]; \ + stp q20, q21, [sp, #32*10]; \ + stp q22, q23, [sp, #32*11]; \ + stp q24, q25, [sp, #32*12]; \ + stp q26, q27, [sp, #32*13]; \ + stp q28, q29, [sp, #32*14]; \ + stp q30, q31, [sp, #32*15]; + +#define RESTORE_Q_REGISTERS \ + ldp q2, q3, [sp, #32*1]; \ + ldp q4, q5, [sp, #32*2]; \ + ldp q6, q7, [sp, #32*3]; \ + ldp q8, q9, [sp, #32*4]; \ + ldp q10, q11, [sp, #32*5]; \ + ldp q12, q13, [sp, #32*6]; \ + ldp q14, q15, [sp, #32*7]; \ + ldp q16, q17, [sp, #32*8]; \ + ldp q18, q19, [sp, #32*9]; \ + ldp q20, q21, [sp, #32*10]; \ + ldp q22, q23, [sp, #32*11]; \ + ldp q24, q25, [sp, #32*12]; \ + ldp q26, q27, [sp, #32*13]; \ + ldp q28, q29, [sp, #32*14]; \ + ldp q30, q31, [sp, #32*15]; \ + ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \ + cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS); + + .text + + /* Compute the thread pointer offset for symbols in the static + TLS block. The offset is the same for all threads. + Prototype: + _dl_tlsdesc_return (tlsdesc *) ; + */ + .hidden _dl_tlsdesc_return + .global _dl_tlsdesc_return + .type _dl_tlsdesc_return,%function + cfi_startproc + .align 2 +_dl_tlsdesc_return: + DELOUSE (0) + ldr PTR_REG (0), [x0, #PTR_SIZE] + RET + cfi_endproc + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return + + /* Same as _dl_tlsdesc_return but with synchronization for + lazy relocation. + Prototype: + _dl_tlsdesc_return_lazy (tlsdesc *) ; + */ + .hidden _dl_tlsdesc_return_lazy + .global _dl_tlsdesc_return_lazy + .type _dl_tlsdesc_return_lazy,%function + cfi_startproc + .align 2 +_dl_tlsdesc_return_lazy: + /* The ldar here happens after the load from [x0] at the call site + (that is generated by the compiler as part of the TLS access ABI), + so it reads the same value (this function is the final value of + td->entry) and thus it synchronizes with the release store to + td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load + from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */ + DELOUSE (0) + ldar PTR_REG (zr), [x0] + ldr PTR_REG (0), [x0, #PTR_SIZE] + RET + cfi_endproc + .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy + + /* Handler for undefined weak TLS symbols. + Prototype: + _dl_tlsdesc_undefweak (tlsdesc *); + + The second word of the descriptor contains the addend. + Return the addend minus the thread pointer. This ensures + that when the caller adds on the thread pointer it gets back + the addend. */ + + .hidden _dl_tlsdesc_undefweak + .global _dl_tlsdesc_undefweak + .type _dl_tlsdesc_undefweak,%function + cfi_startproc + .align 2 +_dl_tlsdesc_undefweak: + str x1, [sp, #-16]! + cfi_adjust_cfa_offset (16) + /* The ldar here happens after the load from [x0] at the call site + (that is generated by the compiler as part of the TLS access ABI), + so it reads the same value (this function is the final value of + td->entry) and thus it synchronizes with the release store to + td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load + from [x0,#8] here happens after the initialization of td->arg. */ + DELOUSE (0) + ldar PTR_REG (zr), [x0] + ldr PTR_REG (0), [x0, #PTR_SIZE] + mrs x1, tpidr_el0 + sub PTR_REG (0), PTR_REG (0), PTR_REG (1) + ldr x1, [sp], #16 + cfi_adjust_cfa_offset (-16) + RET + cfi_endproc + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + +#ifdef SHARED + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + __attribute__ ((__regparm__ (1))) + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); + if (__builtin_expect (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } + */ + + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: +# define NSAVEXREGPAIRS 2 + stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]! + cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) + mov x29, sp + DELOUSE (0) + + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + + stp x1, x2, [sp, #32+16*0] + stp x3, x4, [sp, #32+16*1] + + mrs x4, tpidr_el0 + /* The ldar here happens after the load from [x0] at the call site + (that is generated by the compiler as part of the TLS access ABI), + so it reads the same value (this function is the final value of + td->entry) and thus it synchronizes with the release store to + td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load + from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */ + ldar PTR_REG (zr), [x0] + ldr PTR_REG (1), [x0,#TLSDESC_ARG] + ldr PTR_REG (0), [x4,#TCBHEAD_DTV] + ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT] + ldr PTR_REG (2), [x0,#DTV_COUNTER] + cmp PTR_REG (3), PTR_REG (2) + b.hi 2f + ldr PTR_REG (2), [x1,#TLSDESC_MODID] + add PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1) + ldr PTR_REG (0), [x0] /* Load val member of DTV entry. */ + cmp x0, #TLS_DTV_UNALLOCATED + b.eq 2f + ldr PTR_REG (1), [x1,#TLSDESC_MODOFF] + add PTR_REG (0), PTR_REG (0), PTR_REG (1) + sub PTR_REG (0), PTR_REG (0), PTR_REG (4) +1: + ldp x1, x2, [sp, #32+16*0] + ldp x3, x4, [sp, #32+16*1] + + ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) + cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) +# undef NSAVEXREGPAIRS + RET +2: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ +# define NSAVEXREGPAIRS 7 + stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]! + cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS) + stp x7, x8, [sp, #16*1] + stp x9, x10, [sp, #16*2] + stp x11, x12, [sp, #16*3] + stp x13, x14, [sp, #16*4] + stp x15, x16, [sp, #16*5] + stp x17, x18, [sp, #16*6] + + SAVE_Q_REGISTERS + + mov x0, x1 + bl __tls_get_addr + + mrs x1, tpidr_el0 + sub PTR_REG (0), PTR_REG (0), PTR_REG (1) + + RESTORE_Q_REGISTERS + + ldp x7, x8, [sp, #16*1] + ldp x9, x10, [sp, #16*2] + ldp x11, x12, [sp, #16*3] + ldp x13, x14, [sp, #16*4] + ldp x15, x16, [sp, #16*5] + ldp x17, x18, [sp, #16*6] + ldp x5, x6, [sp], #16*NSAVEXREGPAIRS + cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS) + b 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +# undef NSAVEXREGPAIRS +#endif + + /* This function is a wrapper for a lazy resolver for TLS_DESC + RELA relocations. + When the actual resolver returns, it will have adjusted the + TLS descriptor such that we can tail-call it for it to return + the TP offset of the symbol. */ + + .hidden _dl_tlsdesc_resolve_rela + .global _dl_tlsdesc_resolve_rela + .type _dl_tlsdesc_resolve_rela,%function + cfi_startproc + .align 2 +_dl_tlsdesc_resolve_rela: +#define NSAVEXREGPAIRS 9 + stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! + cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) + mov x29, sp + stp x1, x4, [sp, #32+16*0] + stp x5, x6, [sp, #32+16*1] + stp x7, x8, [sp, #32+16*2] + stp x9, x10, [sp, #32+16*3] + stp x11, x12, [sp, #32+16*4] + stp x13, x14, [sp, #32+16*5] + stp x15, x16, [sp, #32+16*6] + stp x17, x18, [sp, #32+16*7] + str x0, [sp, #32+16*8] + + SAVE_Q_REGISTERS + + DELOUSE (3) + ldr PTR_REG (1), [x3, #PTR_SIZE] + bl _dl_tlsdesc_resolve_rela_fixup + + RESTORE_Q_REGISTERS + + ldr x0, [sp, #32+16*8] + DELOUSE (0) + ldr PTR_REG (1), [x0] + blr x1 + + ldp x1, x4, [sp, #32+16*0] + ldp x5, x6, [sp, #32+16*1] + ldp x7, x8, [sp, #32+16*2] + ldp x9, x10, [sp, #32+16*3] + ldp x11, x12, [sp, #32+16*4] + ldp x13, x14, [sp, #32+16*5] + ldp x15, x16, [sp, #32+16*6] + ldp x17, x18, [sp, #32+16*7] + ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) + cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) + ldp x2, x3, [sp], #16 + cfi_adjust_cfa_offset (-16) + RET +#undef NSAVEXREGPAIRS + cfi_endproc + .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela + + /* This function is a placeholder for lazy resolving of TLS + relocations. Once some thread starts resolving a TLS + relocation, it sets up the TLS descriptor to use this + resolver, such that other threads that would attempt to + resolve it concurrently may skip the call to the original lazy + resolver and go straight to a condition wait. + + When the actual resolver returns, it will have adjusted the + TLS descriptor such that we can tail-call it for it to return + the TP offset of the symbol. */ + + .hidden _dl_tlsdesc_resolve_hold + .global _dl_tlsdesc_resolve_hold + .type _dl_tlsdesc_resolve_hold,%function + cfi_startproc + .align 2 +_dl_tlsdesc_resolve_hold: +#define NSAVEXREGPAIRS 10 +1: + stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! + cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) + mov x29, sp + stp x1, x2, [sp, #32+16*0] + stp x3, x4, [sp, #32+16*1] + stp x5, x6, [sp, #32+16*2] + stp x7, x8, [sp, #32+16*3] + stp x9, x10, [sp, #32+16*4] + stp x11, x12, [sp, #32+16*5] + stp x13, x14, [sp, #32+16*6] + stp x15, x16, [sp, #32+16*7] + stp x17, x18, [sp, #32+16*8] + str x0, [sp, #32+16*9] + + SAVE_Q_REGISTERS + + adr x1, 1b + bl _dl_tlsdesc_resolve_hold_fixup + + RESTORE_Q_REGISTERS + + ldr x0, [sp, #32+16*9] + DELOUSE (0) + ldr PTR_REG (1), [x0] + blr x1 + + ldp x1, x2, [sp, #32+16*0] + ldp x3, x4, [sp, #32+16*1] + ldp x5, x6, [sp, #32+16*2] + ldp x7, x8, [sp, #32+16*3] + ldp x9, x10, [sp, #32+16*4] + ldp x11, x12, [sp, #32+16*5] + ldp x13, x14, [sp, #32+16*6] + ldp x15, x16, [sp, #32+16*7] + ldp x17, x18, [sp, #32+16*8] + ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) + cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) + RET + cfi_endproc + .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold +#undef NSAVEXREGPAIRS |