From 72276d6e8843db6df5971b06787f0a5e39bda138 Mon Sep 17 00:00:00 2001 From: Andrew Senkevich Date: Sat, 16 Jan 2016 00:49:45 +0300 Subject: Added memcpy/memmove family optimized with AVX512 for KNL hardware. Added AVX512 implementations of memcpy, mempcpy, memmove, memcpy_chk, mempcpy_chk, memmove_chk. It shows average improvement more than 30% over AVX versions on KNL hardware (performance results in the thread ). * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Added new files. * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Added new tests. * sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S: New file. * sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memcpy.S: Added new IFUNC branch. * sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memmove.c: Likewise. * sysdeps/x86_64/multiarch/memmove_chk.c: Likewise. * sysdeps/x86_64/multiarch/mempcpy.S: Likewise. * sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise. --- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'sysdeps/x86_64/multiarch/ifunc-impl-list.c') diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 5f600dc1c2..d9ff016659 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -24,7 +24,7 @@ #include "init-arch.h" /* Maximum number of IFUNC implementations. */ -#define MAX_IFUNC 4 +#define MAX_IFUNC 5 /* Fill ARRAY of MAX elements with IFUNC implementations for function NAME supported on target machine and return the number of valid @@ -46,8 +46,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcmp_ssse3) IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2)) - /* Support sysdeps/x86_64/multiarch/memmove_chk.S. */ + /* Support sysdeps/x86_64/multiarch/memmove_chk.c. */ IFUNC_IMPL (i, name, __memmove_chk, + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), __memmove_chk_avx_unaligned) @@ -65,6 +68,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX_Usable), __memmove_avx_unaligned) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), @@ -253,6 +259,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #ifdef SHARED /* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */ IFUNC_IMPL (i, name, __memcpy_chk, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), __memcpy_chk_avx_unaligned) @@ -274,11 +283,17 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_ssse3_back) IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), __memcpy_ssse3) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2)) /* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */ IFUNC_IMPL (i, name, __mempcpy_chk, + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_chk_avx_unaligned) @@ -293,6 +308,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/mempcpy.S. */ IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_avx_unaligned) -- cgit 1.4.1