about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-08-23 06:16:12 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-08-23 06:26:44 -0700
commitb9eaca8fa0a9628a992e0f1478aaadde576804e1 (patch)
tree10d8dcd9696ac057b485e94e7a003d3856de71dc /sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
parent5a706f649de3952271930a8340db4ca8aa50f485 (diff)
downloadglibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.tar.gz
glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.tar.xz
glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.zip
x86_64: Replace AVX512F .byte sequences with instructions
Since binutils 2.25 or later is required to build glibc, we can replace
AVX512F .byte sequences with AVX512F instructions.

Tested on x86-64 and x32.  There are no code differences in libmvec.so
and libmvec.a.

	* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F
	.byte sequences with AVX512F instructions.
	* sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise.
	* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
	* sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise.
	* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S:
	Likewise.
	* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
	Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S63
1 files changed, 6 insertions, 57 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index f73ab7de7c..8fa4255d6d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -510,40 +510,11 @@ libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $384, %rsp
-        /* Encoding for vmovups %zmm1, 128(%rsp).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x4c
-        .byte 0x24
-        .byte 0x02
+        vmovups   %zmm1, 128(%rsp)
         lea       (%rsp), %rdi
-        /* Encoding for vmovups %zmm2, 192(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x57
-        .byte 0x03
-        /* Encoding for vmovups %zmm3, 256(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x5f
-        .byte 0x04
-        /* Encoding for vmovups %zmm4, 320(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x67
-        .byte 0x05
+        vmovups   %zmm2, 192(%rdi)
+        vmovups   %zmm3, 256(%rdi)
+        vmovups   %zmm4, 320(%rdi)
         lea       64(%rsp), %rsi
         call      HIDDEN_JUMPTARGET(\callee)
         movq      128(%rsp), %rdx
@@ -661,30 +632,8 @@ libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
         leal    -112(%rbp), %esi
         leal    -176(%rbp), %edi
         subl    $296, %esp
-        /* Encoding for vmovdqa64 %zmm1, -240(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x7f
-        .byte 0x8d
-        .byte 0x10
-        .byte 0xff
-        .byte 0xff
-        .byte 0xff
-        /* Encoding for vmovdqa64 %zmm2, -304(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x7f
-        .byte 0x95
-        .byte 0xd0
-        .byte 0xfe
-        .byte 0xff
-        .byte 0xff
+        vmovdqa64 %zmm1, -240(%ebp)
+        vmovdqa64 %zmm2, -304(%ebp)
         call    HIDDEN_JUMPTARGET(\callee)
         movl    -240(%ebp), %eax
         vmovss  -176(%ebp), %xmm0