From d2538b91568af2a63c9d8649ce11756d4dfbdac3 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 9 Jun 2017 05:45:43 -0700 Subject: x86-64: Optimize strrchr/wcsrchr with AVX2 Optimize strrchr/wcsrchr with AVX2 to check 32 bytes with vector instructions. It is as fast as SSE2 version for small data sizes and up to 1X faster for large data sizes on Haswell. Select AVX2 version on AVX2 machines where vzeroupper is preferred and AVX unaligned load is fast. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add strrchr-sse2, strrchr-avx2, wcsrchr-sse2 and wcsrchr-avx2. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add tests for __strrchr_avx2, __strrchr_sse2, __wcsrchr_avx2 and __wcsrchr_sse2. * sysdeps/x86_64/multiarch/strrchr-avx2.S: New file. * sysdeps/x86_64/multiarch/strrchr-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strrchr.c: Likewise. * sysdeps/x86_64/multiarch/wcsrchr-avx2.S: Likewise. * sysdeps/x86_64/multiarch/wcsrchr-sse2.S: Likewise. * sysdeps/x86_64/multiarch/wcsrchr.c: Likewise. --- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'sysdeps/x86_64/multiarch/ifunc-impl-list.c') diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 5670eb7e9e..81f4d9b7af 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -251,6 +251,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strchrnul_avx2) IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) + /* Support sysdeps/x86_64/multiarch/strrchr.c. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + HAS_ARCH_FEATURE (AVX2_Usable), + __strrchr_avx2) + IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) + /* Support sysdeps/x86_64/multiarch/strcmp.S. */ IFUNC_IMPL (i, name, strcmp, IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), @@ -342,6 +349,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __wcschr_avx2) IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) + /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + HAS_ARCH_FEATURE (AVX2_Usable), + __wcsrchr_avx2) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) + /* Support sysdeps/x86_64/multiarch/wcscpy.S. */ IFUNC_IMPL (i, name, wcscpy, IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3), -- cgit 1.4.1