diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2017-08-23 06:16:12 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2017-08-23 06:26:44 -0700 |
commit | b9eaca8fa0a9628a992e0f1478aaadde576804e1 (patch) | |
tree | 10d8dcd9696ac057b485e94e7a003d3856de71dc /sysdeps/x86_64/fpu/svml_d_sincos8_core.S | |
parent | 5a706f649de3952271930a8340db4ca8aa50f485 (diff) | |
download | glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.tar.gz glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.tar.xz glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.zip |
x86_64: Replace AVX512F .byte sequences with instructions
Since binutils 2.25 or later is required to build glibc, we can replace AVX512F .byte sequences with AVX512F instructions. Tested on x86-64 and x32. There are no code differences in libmvec.so and libmvec.a. * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F .byte sequences with AVX512F instructions. * sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise. * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise. * sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S: Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/svml_d_sincos8_core.S')
-rw-r--r-- | sysdeps/x86_64/fpu/svml_d_sincos8_core.S | 41 |
1 files changed, 4 insertions, 37 deletions
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S index c104539821..cdea30409a 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S @@ -35,32 +35,10 @@ END (_ZGVeN8vl8l8_sincos) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $320, %rsp - /* Encoding for vmovups %zmm0, 256(%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x44 - .byte 0x24 - .byte 0x04 + vmovups %zmm0, 256(%rsp) lea (%rsp), %rdi - /* Encoding for vmovups %zmm1, 128(%rdi). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x4f - .byte 0x02 - /* Encoding for vmovups %zmm2, 192(%rdi). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x57 - .byte 0x03 + vmovups %zmm1, 128(%rdi) + vmovups %zmm2, 192(%rdi) lea 64(%rsp), %rsi call HIDDEN_JUMPTARGET(\callee) vmovdqu 288(%rsp), %ymm0 @@ -142,18 +120,7 @@ END (_ZGVeN8vl8l8_sincos) subl $280, %esp vmovdqa %ymm1, -208(%ebp) vmovdqa %ymm2, -240(%ebp) - /* Encoding for vmovapd %zmm0, -304(%ebp). */ - .byte 0x67 - .byte 0x62 - .byte 0xf1 - .byte 0xfd - .byte 0x48 - .byte 0x29 - .byte 0x85 - .byte 0xd0 - .byte 0xfe - .byte 0xff - .byte 0xff + vmovapd %zmm0, -304(%ebp) call HIDDEN_JUMPTARGET(\callee) leal 32(%r12), %esi vmovupd -272(%ebp), %ymm0 |