about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-03-13 00:26:57 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-03-31 09:00:47 -0700
commit0db56470f1bee39a252daf2728d818296b179a9e (patch)
tree5d376065cc6fce95724fefee7bd867abd7f05b0b
parent7df7c6a195d6bc6ffdd90db0786d5de9c67d037a (diff)
downloadglibc-hjl/erms/master.tar.gz
glibc-hjl/erms/master.tar.xz
glibc-hjl/erms/master.zip
Add memmove/memset-avx512-unaligned-erms-no-vzeroupper.S hjl/erms/master
-rw-r--r--sysdeps/x86_64/multiarch/Makefile4
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c48
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S12
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S19
4 files changed, 82 insertions, 1 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 8878efbc8f..0218ffae4d 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -24,9 +24,11 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   memmove-sse2-unaligned-erms \
 		   memmove-avx-unaligned-erms \
 		   memmove-avx512-unaligned-erms \
+		   memmove-avx512-unaligned-erms-no-vzeroupper \
 		   memset-sse2-unaligned-erms \
 		   memset-avx2-unaligned-erms \
-		   memset-avx512-unaligned-erms
+		   memset-avx512-unaligned-erms \
+		   memset-avx512-unaligned-erms-no-vzeroupper
 CFLAGS-varshift.c += -msse4
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 1e880f6edc..a621ae0ed3 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -57,7 +57,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_chk_avx512_unaligned_2)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_chk_avx512_no_vzeroupper_unaligned_2)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memmove_chk_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_chk_avx512_no_vzeroupper_unaligned_erms)
 #endif
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -101,7 +107,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_avx512_unaligned_2)
 	      IFUNC_IMPL_ADD (array, i, memmove,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_avx512_no_vzeroupper_unaligned_2)
+	      IFUNC_IMPL_ADD (array, i, memmove,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memmove_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memmove,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_avx512_no_vzeroupper_unaligned_erms)
 #endif
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
 			      __memmove_ssse3_back)
@@ -137,9 +149,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memset_chk_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_chk_avx512_no_vzeroupper_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memset_chk_avx512_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_chk_avx512_no_vzeroupper_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memset_chk_avx512_no_vzeroupper)
 #endif
 	      )
@@ -167,9 +185,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memset_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, memset,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_avx512_no_vzeroupper_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memset,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memset_avx512_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memset,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_avx512_no_vzeroupper_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memset,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memset_avx512_no_vzeroupper)
 #endif
 	     )
@@ -338,7 +362,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memcpy_chk_avx512_unaligned_2)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_chk_avx512_no_vzeroupper_unaligned_2)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memcpy_chk_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_chk_avx512_no_vzeroupper_unaligned_erms)
 #endif
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -386,7 +416,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memcpy_avx512_unaligned_2)
 	      IFUNC_IMPL_ADD (array, i, memcpy,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_avx512_no_vzeroupper_unaligned_2)
+	      IFUNC_IMPL_ADD (array, i, memcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memcpy_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_avx512_no_vzeroupper_unaligned_erms)
 #endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1,
@@ -407,7 +443,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_chk_avx512_unaligned_2)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_chk_avx512_no_vzeroupper_unaligned_2)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __mempcpy_chk_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_chk_avx512_no_vzeroupper_unaligned_erms)
 #endif
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -442,7 +484,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_avx512_unaligned_2)
 	      IFUNC_IMPL_ADD (array, i, mempcpy,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_avx512_no_vzeroupper_unaligned_2)
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __mempcpy_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_avx512_no_vzeroupper_unaligned_erms)
 #endif
 	      IFUNC_IMPL_ADD (array, i, mempcpy,
 			      HAS_ARCH_FEATURE (AVX_Usable),
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S
new file mode 100644
index 0000000000..3ba2851c3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S
@@ -0,0 +1,12 @@
+#ifdef HAVE_AVX512_ASM_SUPPORT
+# define VEC_SIZE	64
+# define VEC(i)		zmm##i
+# define VMOVU		vmovdqu64
+# define VMOVA		vmovdqa64
+# define VZEROUPPER
+
+# define SECTION(p)		p##.avx512
+# define MEMMOVE_SYMBOL(p,s)	p##_avx512_no_vzeroupper_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S
new file mode 100644
index 0000000000..db246306b5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S
@@ -0,0 +1,19 @@
+#ifdef HAVE_AVX512_ASM_SUPPORT
+# define VEC_SIZE	64
+# define VEC(i)		zmm##i
+# define VMOVU		vmovdqu64
+# define VMOVA		vmovdqa64
+# define VZEROUPPER
+# define VZEROUPPER_SHORT_RETURN rep
+
+# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  vmovd d, %xmm0; \
+  movq r, %rax; \
+  vpbroadcastb %xmm0, %xmm0; \
+  vpbroadcastq %xmm0, %zmm0
+
+# define SECTION(p)		p##.avx512
+# define MEMSET_SYMBOL(p,s)	p##_avx512_no_vzeroupper_##s
+
+# include "memset-vec-unaligned-erms.S"
+#endif