From 72276d6e8843db6df5971b06787f0a5e39bda138 Mon Sep 17 00:00:00 2001 From: Andrew Senkevich Date: Sat, 16 Jan 2016 00:49:45 +0300 Subject: Added memcpy/memmove family optimized with AVX512 for KNL hardware. Added AVX512 implementations of memcpy, mempcpy, memmove, memcpy_chk, mempcpy_chk, memmove_chk. It shows average improvement more than 30% over AVX versions on KNL hardware (performance results in the thread ). * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Added new files. * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Added new tests. * sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S: New file. * sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memcpy.S: Added new IFUNC branch. * sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memmove.c: Likewise. * sysdeps/x86_64/multiarch/memmove_chk.c: Likewise. * sysdeps/x86_64/multiarch/mempcpy.S: Likewise. * sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise. --- sysdeps/x86_64/multiarch/memmove_chk.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'sysdeps/x86_64/multiarch/memmove_chk.c') diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c index 60ed98fd40..f64da63180 100644 --- a/sysdeps/x86_64/multiarch/memmove_chk.c +++ b/sysdeps/x86_64/multiarch/memmove_chk.c @@ -26,10 +26,19 @@ extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden; +# ifdef HAVE_AVX512_ASM_SUPPORT + extern __typeof (__memmove_chk) __memmove_chk_avx512_no_vzeroupper attribute_hidden; +# endif #include "debug/memmove_chk.c" libc_ifunc (__memmove_chk, +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + ? __memmove_chk_avx512_no_vzeroupper + : +#endif HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_chk_avx_unaligned : (HAS_CPU_FEATURE (SSSE3) ? (HAS_ARCH_FEATURE (Fast_Copy_Backward) -- cgit 1.4.1