summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-08-23 06:16:12 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-08-23 06:26:44 -0700
commitb9eaca8fa0a9628a992e0f1478aaadde576804e1 (patch)
tree10d8dcd9696ac057b485e94e7a003d3856de71dc /sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
parent5a706f649de3952271930a8340db4ca8aa50f485 (diff)
downloadglibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.tar.gz
glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.tar.xz
glibc-b9eaca8fa0a9628a992e0f1478aaadde576804e1.zip
x86_64: Replace AVX512F .byte sequences with instructions
Since binutils 2.25 or later is required to build glibc, we can replace
AVX512F .byte sequences with AVX512F instructions.

Tested on x86-64 and x32.  There are no code differences in libmvec.so
and libmvec.a.

	* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F
	.byte sequences with AVX512F instructions.
	* sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise.
	* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
	* sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise.
	* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S:
	Likewise.
	* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
	Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/svml_s_wrapper_impl.h')
-rw-r--r--sysdeps/x86_64/fpu/svml_s_wrapper_impl.h57
1 files changed, 6 insertions, 51 deletions
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index cd6d58361c..00b86cd377 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -246,29 +246,14 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
+        vmovups   %zmm0, (%rsp)
         vmovupd   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 64(%rsp)
         vmovupd   32(%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x01
+        vmovups   64(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -286,23 +271,8 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x4c
-        .byte   0x24
-        .byte   0x01
+        vmovups   %zmm0, (%rsp)
+        vmovups   %zmm1, 64(%rsp)
         vmovups   (%rsp), %ymm0
         vmovups   64(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
@@ -311,15 +281,7 @@
         vmovups   96(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
         vmovups   %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x02
+        vmovups   128(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -340,14 +302,7 @@
         pushq     %r13
         subq      $176, %rsp
         movq      %rsi, %r13
-/* Below is encoding for vmovaps %zmm0, (%rsp).  */
-        .byte	0x62
-        .byte	0xf1
-        .byte	0x7c
-        .byte	0x48
-        .byte	0x29
-        .byte	0x04
-        .byte	0x24
+        vmovaps   %zmm0, (%rsp)
         movq      %rdi, %r12
         vmovaps   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)