diff options
author | Igor Zamyatin <igor.zamyatin@intel.com> | 2014-03-13 11:10:22 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2014-03-13 11:19:08 -0700 |
commit | 2d63a517e4084ec80403cd9f278690fa8b676cc4 (patch) | |
tree | d5d5dd025d9a59bd41fedf9c16799271b5dbb722 /sysdeps/x86_64/dl-trampoline.h | |
parent | 44c4e5d598bfcbb309f05ceb7a57ab02662e7f34 (diff) | |
download | glibc-2d63a517e4084ec80403cd9f278690fa8b676cc4.tar.gz glibc-2d63a517e4084ec80403cd9f278690fa8b676cc4.tar.xz glibc-2d63a517e4084ec80403cd9f278690fa8b676cc4.zip |
Save and restore AVX-512 zmm registers to x86-64 ld.so
AVX-512 ISA adds 512-bit zmm registers. This patch updates _dl_runtime_profile to pass zmm registers to run-time audit. It also changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL is used. Its performance impact is minimum. * config.h.in (HAVE_AVX512_SUPPORT): New #undef. (HAVE_AVX512_ASM_SUPPORT): Likewise. * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New. (La_x86_64_vector): Add zmm. * sysdeps/x86_64/Makefile (tests): Add tst-audit10. (modules-names): Add tst-auditmod10a and tst-auditmod10b. ($(objpfx)tst-audit10): New target. ($(objpfx)tst-audit10.out): Likewise. (tst-audit10-ENV): New. (AVX512-CFLAGS): Likewise. (CFLAGS-tst-audit10.c): Likewise. (CFLAGS-tst-auditmod10a.c): Likewise. (CFLAGS-tst-auditmod10b.c): Likewise. * sysdeps/x86_64/configure.ac: Set config-cflags-avx512, HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add AVX-512 zmm register support. (_dl_x86_64_save_sse): Likewise. (_dl_x86_64_restore_sse): Likewise. * sysdeps/x86_64/dl-trampoline.h: Updated to support different size vector registers. * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New. (ZMM_SIZE): Likewise. * sysdeps/x86_64/tst-audit10.c: New file. * sysdeps/x86_64/tst-auditmod10a.c: Likewise. * sysdeps/x86_64/tst-auditmod10b.c: Likewise.
Diffstat (limited to 'sysdeps/x86_64/dl-trampoline.h')
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.h | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index 5d1b75ff54..161af0ff57 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -19,14 +19,14 @@ #ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) - vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) - vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) - vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) - vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) - vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) - vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) - vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) + VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) /* Save xmm0-xmm7 registers to detect if any of them are changed by audit module. */ @@ -72,7 +72,7 @@ je 2f vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 +2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 @@ -81,7 +81,7 @@ je 2f vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 @@ -90,7 +90,7 @@ je 2f vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 @@ -99,7 +99,7 @@ je 2f vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 @@ -108,7 +108,7 @@ je 2f vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 @@ -117,7 +117,7 @@ je 2f vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 @@ -126,7 +126,7 @@ je 2f vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 @@ -135,7 +135,7 @@ je 2f vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 1: @@ -213,8 +213,8 @@ #ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) - vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) + VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) + VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) /* Save xmm0/xmm1 registers to detect if they are changed by audit module. */ @@ -243,13 +243,13 @@ vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 + VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 + VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) 1: #endif |