From 0aac205a814a8511e98d02b91a8dc908f1c53cde Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 26 Feb 2024 06:37:03 -0800 Subject: x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers Compiler generates the following instruction sequence for GNU2 dynamic TLS access: leaq tls_var@TLSDESC(%rip), %rax call *tls_var@TLSCALL(%rax) or leal tls_var@TLSDESC(%ebx), %eax call *tls_var@TLSCALL(%eax) CALL instruction is transparent to compiler which assumes all registers, except for EFLAGS and RAX/EAX, are unchanged after CALL. When _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow path. __tls_get_addr is a normal function which doesn't preserve any caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer caller-saved registers, but didn't preserve any other caller-saved registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all caller-saved registers. This fixes BZ #31372. Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic) to optimize elf_machine_runtime_setup. Reviewed-by: Noah Goldstein --- sysdeps/i386/dl-tlsdesc.S | 115 ++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 65 deletions(-) (limited to 'sysdeps/i386/dl-tlsdesc.S') diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S index 90d93caa0c..f002feee56 100644 --- a/sysdeps/i386/dl-tlsdesc.S +++ b/sysdeps/i386/dl-tlsdesc.S @@ -18,8 +18,27 @@ #include #include +#include +#include #include "tlsdesc.h" +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 4-byte stack alignment. Although + this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume + that stack will be always aligned at 16 bytes. */ +# define DL_STACK_ALIGNMENT 4 +#endif + +/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align + stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) + .text /* This function is used to compute the TP offset for symbols in @@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak: .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak #ifdef SHARED - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,@function - - /* This function is used for symbols that need dynamic TLS. - - %eax points to the TLS descriptor, such that 0(%eax) points to - _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct - tlsdesc_dynamic_arg object. It must return in %eax the offset - between the thread pointer and the object denoted by the - argument, without clobbering any registers. - - The assembly code that follows is a rendition of the following - C code, hand-optimized a little bit. - -ptrdiff_t -__attribute__ ((__regparm__ (1))) -_dl_tlsdesc_dynamic (struct tlsdesc *tdp) -{ - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); - if (__builtin_expect (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset - - __thread_pointer; - - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; -} -*/ - cfi_startproc - .align 16 -_dl_tlsdesc_dynamic: - /* Like all TLS resolvers, preserve call-clobbered registers. - We need two scratch regs anyway. */ - subl $28, %esp - cfi_adjust_cfa_offset (28) - movl %ecx, 20(%esp) - movl %edx, 24(%esp) - movl TLSDESC_ARG(%eax), %eax - movl %gs:DTV_OFFSET, %edx - movl TLSDESC_GEN_COUNT(%eax), %ecx - cmpl (%edx), %ecx - ja .Lslow - movl TLSDESC_MODID(%eax), %ecx - movl (%edx,%ecx,8), %edx - cmpl $-1, %edx - je .Lslow - movl TLSDESC_MODOFF(%eax), %eax - addl %edx, %eax -.Lret: - movl 20(%esp), %ecx - subl %gs:0, %eax - movl 24(%esp), %edx - addl $28, %esp - cfi_adjust_cfa_offset (-28) - ret - .p2align 4,,7 -.Lslow: - cfi_adjust_cfa_offset (28) - call HIDDEN_JUMPTARGET (___tls_get_addr) - jmp .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +# define USE_FNSAVE +# define MINIMUM_ALIGNMENT 4 +# define STATE_SAVE_ALIGNMENT 4 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fnsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef MINIMUM_ALIGNMENT +# undef USE_FNSAVE + +# define MINIMUM_ALIGNMENT 16 + +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_FXSAVE + +# define USE_XSAVE +# define STATE_SAVE_ALIGNMENT 64 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_XSAVE + +# define USE_XSAVEC +# define STATE_SAVE_ALIGNMENT 64 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_XSAVEC #endif /* SHARED */ -- cgit 1.4.1