diff options
author | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-01-15 22:32:07 +0300 |
---|---|---|
committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-01-15 22:32:07 +0300 |
commit | 6e0be40a24ccdacd69c3bdf2f70a59a24308b309 (patch) | |
tree | 402b7a3c5e99bd9ab30b2bd1b82f25832fef4d7b /sysdeps/x86_64/multiarch/memmove.c | |
parent | a3e5b4feeb54cb92657ec2bc6d9be1fcef9e8575 (diff) | |
download | glibc-6e0be40a24ccdacd69c3bdf2f70a59a24308b309.tar.gz glibc-6e0be40a24ccdacd69c3bdf2f70a59a24308b309.tar.xz glibc-6e0be40a24ccdacd69c3bdf2f70a59a24308b309.zip |
Added memcpy/memmove family optimized with AVX512 for KNL hardware.
Added AVX512 implementations of memcpy, mempcpy, memmove, memcpy_chk, mempcpy_chk, memmove_chk. It shows average improvement more than 30% over AVX versions on KNL hardware (performance results attached in the thread <https://sourceware.org/ml/libc-alpha/2016-01/msg00258.html>). * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Added new files. * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Added new tests. * sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S: New file. * sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memcpy.S: Added new IFUNC branch. * sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memmove.c: Likewise. * sysdeps/x86_64/multiarch/memmove_chk.c: Likewise. * sysdeps/x86_64/multiarch/mempcpy.S: Likewise. * sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memmove.c')
-rw-r--r-- | sysdeps/x86_64/multiarch/memmove.c | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index e8445317a2..8da5640bb0 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -36,6 +36,9 @@ extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden; extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden; +# ifdef HAVE_AVX512_ASM_SUPPORT + extern __typeof (__redirect_memmove) __memmove_avx512_no_vzeroupper attribute_hidden; +# endif #endif @@ -49,12 +52,18 @@ extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden; ifunc symbol properly. */ extern __typeof (__redirect_memmove) __libc_memmove; libc_ifunc (__libc_memmove, - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + ? __memmove_avx512_no_vzeroupper + : +#endif + (HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_avx_unaligned : (HAS_CPU_FEATURE (SSSE3) ? (HAS_ARCH_FEATURE (Fast_Copy_Backward) ? __memmove_ssse3_back : __memmove_ssse3) - : __memmove_sse2)); + : __memmove_sse2))); strong_alias (__libc_memmove, memmove) |