diff options
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.h | 113 |
2 files changed, 61 insertions, 56 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index f669805ac5..580d2b6499 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -57,22 +57,26 @@ #define VMOVA vmovdqa64 #define VEC(i) zmm##i #define _dl_runtime_profile _dl_runtime_profile_avx512 +# define SECTION(p) p##.evex512 #include "dl-trampoline.h" #undef _dl_runtime_profile #undef VEC #undef VMOVA #undef VEC_SIZE +#undef SECTION #if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL # define VEC_SIZE 32 # define VMOVA vmovdqa # define VEC(i) ymm##i +# define SECTION(p) p##.avx # define _dl_runtime_profile _dl_runtime_profile_avx # include "dl-trampoline.h" # undef _dl_runtime_profile # undef VEC # undef VMOVA # undef VEC_SIZE +# undef SECTION #endif #if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index 03bd91b3e9..3c419047ec 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -16,7 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ - .text +#ifndef SECTION +# define SECTION(p) p +#endif + + .section SECTION(.text),"ax",@progbits #ifdef _dl_runtime_resolve # undef REGISTER_SAVE_AREA @@ -219,19 +223,19 @@ _dl_runtime_profile: /* We always store the XMM registers even if AVX is available. This is to provide backward binary compatibility for existing audit modules. */ - movaps %xmm0, (LR_XMM_OFFSET)(%rsp) - movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) - movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) - movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) - movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) - movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) - movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) - movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) + VMOVA %xmm0, (LR_XMM_OFFSET + XMM_SIZE*0)(%rsp) + VMOVA %xmm1, (LR_XMM_OFFSET + XMM_SIZE*1)(%rsp) + VMOVA %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) + VMOVA %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) + VMOVA %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) + VMOVA %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) + VMOVA %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) + VMOVA %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) # ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp) - VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + VMOVA %VEC(0), (LR_VECTOR_OFFSET + VECTOR_SIZE*0)(%rsp) + VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE*1)(%rsp) VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) @@ -241,8 +245,8 @@ _dl_runtime_profile: /* Save xmm0-xmm7 registers to detect if any of them are changed by audit module. */ - vmovdqa %xmm0, (LR_SIZE)(%rsp) - vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) + vmovdqa %xmm0, (LR_SIZE + XMM_SIZE*0)(%rsp) + vmovdqa %xmm1, (LR_SIZE + XMM_SIZE*1)(%rsp) vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) @@ -265,84 +269,84 @@ _dl_runtime_profile: movq LR_R8_OFFSET(%rsp), %r8 movq LR_R9_OFFSET(%rsp), %r9 - movaps (LR_XMM_OFFSET)(%rsp), %xmm0 - movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 - movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 - movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 - movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 - movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 - movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 - movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*0)(%rsp), %xmm0 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*1)(%rsp), %xmm1 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 + VMOVA (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 # ifdef RESTORE_AVX /* Check if any xmm0-xmm7 registers are changed by audit module. */ - vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 + vpcmpeqb (LR_SIZE)(%rsp), %xmm0, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0) vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) jmp 1f 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) -1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 +1: vpcmpeqb (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi + incw %si je 2f vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) jmp 1f @@ -352,8 +356,8 @@ _dl_runtime_profile: 1: # endif - mov 16(%rbx), %R10_LP # Anything in framesize? - test %R10_LP, %R10_LP + mov 16(%rbx), %RCX_LP # Anything in framesize? + test %RCX_LP, %RCX_LP jns 3f /* There's nothing in the frame size, so there @@ -385,14 +389,11 @@ _dl_runtime_profile: returned from _dl_profile_fixup */ lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack - add $8, %R10_LP - and $-16, %R10_LP - mov %R10_LP, %RCX_LP - sub %R10_LP, %RSP_LP + add $8, %RCX_LP + and $-16, %RCX_LP + sub %RCX_LP, %RSP_LP mov %RSP_LP, %RDI_LP - shr $3, %RCX_LP - rep - movsq + rep movsb movq 24(%rdi), %rcx # Get back register content. movq 32(%rdi), %rsi @@ -428,8 +429,8 @@ _dl_runtime_profile: movq %rax, LRV_RAX_OFFSET(%rcx) movq %rdx, LRV_RDX_OFFSET(%rcx) - movaps %xmm0, LRV_XMM0_OFFSET(%rcx) - movaps %xmm1, LRV_XMM1_OFFSET(%rcx) + VMOVA %xmm0, LRV_XMM0_OFFSET(%rcx) + VMOVA %xmm1, LRV_XMM1_OFFSET(%rcx) # ifdef RESTORE_AVX /* This is to support AVX audit modules. */ @@ -438,8 +439,8 @@ _dl_runtime_profile: /* Save xmm0/xmm1 registers to detect if they are changed by audit module. */ - vmovdqa %xmm0, (LRV_SIZE)(%rcx) - vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) + vmovdqa %xmm0, (LRV_SIZE + XMM_SIZE*0)(%rcx) + vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE*1)(%rcx) # endif fstpt LRV_ST0_OFFSET(%rcx) @@ -454,20 +455,20 @@ _dl_runtime_profile: movq LRV_RAX_OFFSET(%rsp), %rax movq LRV_RDX_OFFSET(%rsp), %rdx - movaps LRV_XMM0_OFFSET(%rsp), %xmm0 - movaps LRV_XMM1_OFFSET(%rsp), %xmm1 + VMOVA LRV_XMM0_OFFSET(%rsp), %xmm0 + VMOVA LRV_XMM1_OFFSET(%rsp), %xmm1 # ifdef RESTORE_AVX /* Check if xmm0/xmm1 registers are changed by audit module. */ - vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 + vpcmpeqb (LRV_SIZE)(%rsp), %xmm0, %xmm2 vpmovmskb %xmm2, %esi - cmpl $0xffff, %esi + incw %si jne 1f VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0) -1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 +1: vpcmpeqb (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 vpmovmskb %xmm2, %esi - cmpl $0xffff, %esi + incw %si jne 1f VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1) |