From b9eaca8fa0a9628a992e0f1478aaadde576804e1 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 23 Aug 2017 06:16:12 -0700 Subject: x86_64: Replace AVX512F .byte sequences with instructions Since binutils 2.25 or later is required to build glibc, we can replace AVX512F .byte sequences with AVX512F instructions. Tested on x86-64 and x32. There are no code differences in libmvec.so and libmvec.a. * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F .byte sequences with AVX512F instructions. * sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise. * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise. * sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S: Likewise. --- sysdeps/x86_64/fpu/svml_s_wrapper_impl.h | 57 ++++---------------------------- 1 file changed, 6 insertions(+), 51 deletions(-) (limited to 'sysdeps/x86_64/fpu/svml_s_wrapper_impl.h') diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h index cd6d58361c..00b86cd377 100644 --- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h +++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h @@ -246,29 +246,14 @@ cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $128, %rsp -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 + vmovups %zmm0, (%rsp) vmovupd (%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 64(%rsp) vmovupd 32(%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 96(%rsp) -/* Below is encoding for vmovups 64(%rsp), %zmm0. */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x10 - .byte 0x44 - .byte 0x24 - .byte 0x01 + vmovups 64(%rsp), %zmm0 movq %rbp, %rsp cfi_def_cfa_register (%rsp) popq %rbp @@ -286,23 +271,8 @@ cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $192, %rsp -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 -/* Below is encoding for vmovups %zmm1, 64(%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x4c - .byte 0x24 - .byte 0x01 + vmovups %zmm0, (%rsp) + vmovups %zmm1, 64(%rsp) vmovups (%rsp), %ymm0 vmovups 64(%rsp), %ymm1 call HIDDEN_JUMPTARGET(\callee) @@ -311,15 +281,7 @@ vmovups 96(%rsp), %ymm1 call HIDDEN_JUMPTARGET(\callee) vmovups %ymm0, 160(%rsp) -/* Below is encoding for vmovups 128(%rsp), %zmm0. */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x10 - .byte 0x44 - .byte 0x24 - .byte 0x02 + vmovups 128(%rsp), %zmm0 movq %rbp, %rsp cfi_def_cfa_register (%rsp) popq %rbp @@ -340,14 +302,7 @@ pushq %r13 subq $176, %rsp movq %rsi, %r13 -/* Below is encoding for vmovaps %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x29 - .byte 0x04 - .byte 0x24 + vmovaps %zmm0, (%rsp) movq %rdi, %r12 vmovaps (%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) -- cgit 1.4.1