diff options
4 files changed, 82 insertions, 1 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 8878efbc8f..0218ffae4d 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -24,9 +24,11 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ memmove-sse2-unaligned-erms \ memmove-avx-unaligned-erms \ memmove-avx512-unaligned-erms \ + memmove-avx512-unaligned-erms-no-vzeroupper \ memset-sse2-unaligned-erms \ memset-avx2-unaligned-erms \ - memset-avx512-unaligned-erms + memset-avx512-unaligned-erms \ + memset-avx512-unaligned-erms-no-vzeroupper CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 1e880f6edc..a621ae0ed3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -57,7 +57,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_chk_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -101,7 +107,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) @@ -137,9 +149,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memset_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_no_vzeroupper) #endif ) @@ -167,9 +185,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memset_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_no_vzeroupper) #endif ) @@ -338,7 +362,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_chk_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -386,7 +416,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, memcpy, HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, @@ -407,7 +443,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_chk_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -442,7 +484,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S new file mode 100644 index 0000000000..3ba2851c3e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S @@ -0,0 +1,12 @@ +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VEC(i) zmm##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define SECTION(p) p##.avx512 +# define MEMMOVE_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S new file mode 100644 index 0000000000..db246306b5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S @@ -0,0 +1,19 @@ +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VEC(i) zmm##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER +# define VZEROUPPER_SHORT_RETURN rep + +# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + vmovd d, %xmm0; \ + movq r, %rax; \ + vpbroadcastb %xmm0, %xmm0; \ + vpbroadcastq %xmm0, %zmm0 + +# define SECTION(p) p##.avx512 +# define MEMSET_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s + +# include "memset-vec-unaligned-erms.S" +#endif |