summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-06-09 05:42:16 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-06-09 05:42:29 -0700
commit8fe57365bfb5a417d911ab715a5671b3b1d7b155 (patch)
tree9b6a729305aaa4f9b30e8a016469fbbd5a7bc664
parentdc485ceb2ac596d27294cc1942adf3181f15e8bf (diff)
downloadglibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.tar.gz
glibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.tar.xz
glibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.zip
x86-64: Optimize strchr/strchrnul/wcschr with AVX2
Optimize strchr/strchrnul/wcschr with AVX2 to search 32 bytes with vector
instructions.  It is as fast as SSE2 versions for size <= 16 bytes and up
to 1X faster for or size > 16 bytes on Haswell.  Select AVX2 version on
AVX2 machines where vzeroupper is preferred and AVX unaligned load is fast.

NB: It uses TZCNT instead of BSF since TZCNT produces the same result
as BSF for non-zero input.  TZCNT is faster than BSF and is executed
as BSF if machine doesn't support TZCNT.

	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
	strchr-sse2, strchrnul-sse2, strchr-avx2, strchrnul-avx2,
	wcschr-sse2 and wcschr-avx2.
	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add tests for __strchr_avx2,
	__strchrnul_avx2, __strchrnul_sse2, __wcschr_avx2 and
	__wcschr_sse2.
	* sysdeps/x86_64/multiarch/strchr-avx2.S: New file.
	* sysdeps/x86_64/multiarch/strchr-sse2.S: Likewise.
	* sysdeps/x86_64/multiarch/strchr.c: Likewise.
	* sysdeps/x86_64/multiarch/strchrnul-avx2.S: Likewise.
	* sysdeps/x86_64/multiarch/strchrnul-sse2.S: Likewise.
	* sysdeps/x86_64/multiarch/strchrnul.c: Likewise.
	* sysdeps/x86_64/multiarch/wcschr-avx2.S: Likewise.
	* sysdeps/x86_64/multiarch/wcschr-sse2.S: Likewise.
	* sysdeps/x86_64/multiarch/wcschr.c: Likewise.
	* sysdeps/x86_64/multiarch/strchr.S: Removed.
-rw-r--r--ChangeLog20
-rw-r--r--sysdeps/x86_64/multiarch/Makefile2
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c19
-rw-r--r--sysdeps/x86_64/multiarch/strchr-avx2.S254
-rw-r--r--sysdeps/x86_64/multiarch/strchr-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S57
-rw-r--r--sysdeps/x86_64/multiarch/strchr.c55
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul-sse2.S26
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul.c34
-rw-r--r--sysdeps/x86_64/multiarch/wcschr-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/wcschr-sse2.S30
-rw-r--r--sysdeps/x86_64/multiarch/wcschr.c39
13 files changed, 512 insertions, 58 deletions
diff --git a/ChangeLog b/ChangeLog
index fc3dff043a..e8eb9e7b48 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,26 @@
 2017-06-09  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	strchr-sse2, strchrnul-sse2, strchr-avx2, strchrnul-avx2,
+	wcschr-sse2 and wcschr-avx2.
+	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
+	(__libc_ifunc_impl_list): Add tests for __strchr_avx2,
+	__strchrnul_avx2, __strchrnul_sse2, __wcschr_avx2 and
+	__wcschr_sse2.
+	* sysdeps/x86_64/multiarch/strchr-avx2.S: New file.
+	* sysdeps/x86_64/multiarch/strchr-sse2.S: Likewise.
+	* sysdeps/x86_64/multiarch/strchr.c: Likewise.
+	* sysdeps/x86_64/multiarch/strchrnul-avx2.S: Likewise.
+	* sysdeps/x86_64/multiarch/strchrnul-sse2.S: Likewise.
+	* sysdeps/x86_64/multiarch/strchrnul.c: Likewise.
+	* sysdeps/x86_64/multiarch/wcschr-avx2.S: Likewise.
+	* sysdeps/x86_64/multiarch/wcschr-sse2.S: Likewise.
+	* sysdeps/x86_64/multiarch/wcschr.c: Likewise.
+	* sysdeps/x86_64/multiarch/strchr.S: Removed.
+
+2017-06-09  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
 	strlen-sse2, strnlen-sse2, strlen-avx2, strnlen-avx2,
 	wcslen-sse2, wcslen-avx2 and wcsnlen-avx2.
 	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 1846ae1acb..4523f51095 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -13,6 +13,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   memcpy-ssse3-back \
 		   memmove-ssse3-back \
 		   memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
+		   strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
 		   strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
 		   strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
 		   strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
@@ -37,6 +38,7 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
 		   wmemcmp-avx2-movbe \
 		   wmemchr-sse2 wmemchr-avx2 \
 		   wcscpy-ssse3 wcscpy-c \
+		   wcschr-sse2 wcschr-avx2 \
 		   wcsnlen-sse4_1 wcsnlen-c \
 		   wcslen-sse2 wcslen-avx2 wcsnlen-avx2
 endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 9d499ffe0b..8dda1b040a 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -229,11 +229,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strchr.S.  */
+  /* Support sysdeps/x86_64/multiarch/strchr.c.  */
   IFUNC_IMPL (i, name, strchr,
+	      IFUNC_IMPL_ADD (array, i, strchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
 
+  /* Support sysdeps/x86_64/multiarch/strchrnul.c.  */
+  IFUNC_IMPL (i, name, strchrnul,
+	      IFUNC_IMPL_ADD (array, i, strchrnul,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strchrnul_avx2)
+	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
   /* Support sysdeps/x86_64/multiarch/strcmp.S.  */
   IFUNC_IMPL (i, name, strcmp,
 	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
@@ -318,6 +328,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
 
+  /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
+  IFUNC_IMPL (i, name, wcschr,
+	      IFUNC_IMPL_ADD (array, i, wcschr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcschr_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
   /* Support sysdeps/x86_64/multiarch/wcscpy.S.  */
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
new file mode 100644
index 0000000000..e4292d33ab
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
@@ -0,0 +1,254 @@
+/* strchr/strchrnul optimized with AVX2.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCHR
+#  define STRCHR	__strchr_avx2
+# endif
+
+# ifdef USE_AS_WCSCHR
+#  define VPBROADCAST	vpbroadcastd
+#  define VPCMPEQ	vpcmpeqd
+#  define CHAR_REG	esi
+# else
+#  define VPBROADCAST	vpbroadcastb
+#  define VPCMPEQ	vpcmpeqb
+#  define CHAR_REG	sil
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (STRCHR)
+	movl	%edi, %ecx
+	/* Broadcast CHAR to YMM0.  */
+	vmovd	%esi, %xmm0
+	vpxor	%xmm9, %xmm9, %xmm9
+	VPBROADCAST %xmm0, %ymm0
+	/* Check if we may cross page boundary with one vector load.  */
+	andl	$(2 * VEC_SIZE - 1), %ecx
+	cmpl	$VEC_SIZE, %ecx
+	ja	L(cros_page_boundary)
+
+	/* Check the first VEC_SIZE bytes.  Search for both CHAR and the
+	   null byte.  */
+	vmovdqu	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	/* Align data for aligned loads in the loop.  */
+	addq	$VEC_SIZE, %rdi
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+
+	jmp	L(more_4x_vec)
+
+	.p2align 4
+L(cros_page_boundary):
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+	vmovdqu	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %eax
+	testl	%eax, %eax
+	jz	L(aligned_more)
+	/* Found CHAR or the null byte.  */
+	tzcntl	%eax, %eax
+	addq	%rcx, %rax
+# ifdef USE_AS_STRCHRNUL
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(aligned_more):
+	addq	$VEC_SIZE, %rdi
+
+L(more_4x_vec):
+	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
+	   since data is only aligned to VEC_SIZE.  */
+	vmovdqa	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	vmovdqa	VEC_SIZE(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x3)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+	/* Align data to 4 * VEC_SIZE.  */
+	movq	%rdi, %rcx
+	andl	$(4 * VEC_SIZE - 1), %ecx
+	andq	$-(4 * VEC_SIZE), %rdi
+
+	.p2align 4
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	vmovdqa	(%rdi), %ymm5
+	vmovdqa	VEC_SIZE(%rdi), %ymm6
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm7
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
+
+	VPCMPEQ %ymm5, %ymm0, %ymm1
+	VPCMPEQ %ymm6, %ymm0, %ymm2
+	VPCMPEQ %ymm7, %ymm0, %ymm3
+	VPCMPEQ %ymm8, %ymm0, %ymm4
+
+	VPCMPEQ %ymm5, %ymm9, %ymm5
+	VPCMPEQ %ymm6, %ymm9, %ymm6
+	VPCMPEQ %ymm7, %ymm9, %ymm7
+	VPCMPEQ %ymm8, %ymm9, %ymm8
+
+	vpor	%ymm1, %ymm5, %ymm1
+	vpor	%ymm2, %ymm6, %ymm2
+	vpor	%ymm3, %ymm7, %ymm3
+	vpor	%ymm4, %ymm8, %ymm4
+
+	vpor	%ymm1, %ymm2, %ymm5
+	vpor	%ymm3, %ymm4, %ymm6
+
+	vpor	%ymm5, %ymm6, %ymm5
+
+	vpmovmskb %ymm5, %eax
+	testl	%eax, %eax
+	jnz	L(4x_vec_end)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+	jmp	L(loop_4x_vec)
+
+	.p2align 4
+L(first_vec_x0):
+	/* Found CHAR or the null byte.  */
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	VEC_SIZE(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(VEC_SIZE * 2)(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(4x_vec_end):
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+L(first_vec_x3):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(VEC_SIZE * 3)(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+END (STRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S
new file mode 100644
index 0000000000..c4d1c0c773
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-sse2.S
@@ -0,0 +1,28 @@
+/* strchr optimized with SSE2.
+   Copyright (C) 2009-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define strchr __strchr_sse2
+
+# undef weak_alias
+# define weak_alias(strchr, index)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strchr)
+#endif
+
+#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
deleted file mode 100644
index c9f54ca2e2..0000000000
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
-   Copyright (C) 2009-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
-	.text
-ENTRY(strchr)
-	.type	strchr, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__strchr_sse2(%rip), %rax
-2:	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	3f
-	leaq    __strchr_sse2_no_bsf(%rip), %rax
-3:	ret
-END(strchr)
-
-
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strchr_sse2, @function; \
-	.align 16; \
-	.globl __strchr_sse2; \
-	.hidden __strchr_sse2; \
-	__strchr_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strchr calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strchr; __GI_strchr = __strchr_sse2
-#endif
-
-#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
new file mode 100644
index 0000000000..22af16e5c9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr.c
@@ -0,0 +1,55 @@
+/* Multiple versions of strchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2009-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+    return OPTIMIZE (sse2_no_bsf);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias (strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
new file mode 100644
index 0000000000..fa0cc09760
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __strchrnul_avx2
+#define USE_AS_STRCHRNUL 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
new file mode 100644
index 0000000000..4d199b3de8
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
@@ -0,0 +1,26 @@
+/* strchrnul optimized with SSE2.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __strchrnul __strchrnul_sse2
+
+# undef weak_alias
+# define weak_alias(__strchrnul, strchrnul)
+#endif
+
+#include "../strchrnul.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul.c b/sysdeps/x86_64/multiarch/strchrnul.c
new file mode 100644
index 0000000000..4a4c55a937
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strchrnul.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+		       IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S
new file mode 100644
index 0000000000..67726b6837
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __wcschr_avx2
+#define USE_AS_WCSCHR 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S
new file mode 100644
index 0000000000..9a1b45aaac
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S
@@ -0,0 +1,30 @@
+/* wcschr optimized with SSE2.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __wcschr __wcschr_sse2
+
+# undef weak_alias
+# define weak_alias(__wcschr, wcschr)
+# undef libc_hidden_def
+# define libc_hidden_def(__wcschr)
+# undef libc_hidden_weak
+# define libc_hidden_weak(wcschr)
+#endif
+
+#include "../wcschr.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr.c b/sysdeps/x86_64/multiarch/wcschr.c
new file mode 100644
index 0000000000..70d36926e1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wcschr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcschr __redirect_wcschr
+# define __wcschr __redirect___wcschr
+# include <wchar.h>
+# undef wcschr
+# undef __wcschr
+
+# define SYMBOL_NAME wcschr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcschr, __wcschr, IFUNC_SELECTOR ());
+weak_alias (__wcschr, wcschr);
+# ifdef SHARED
+__hidden_ver1 (__wcschr, __GI___wcschr, __redirect___wcschr)
+  __attribute__((visibility ("hidden")));
+__hidden_ver1 (wcschr, __GI_wcschr, __redirect_wcschr)
+  __attribute__((weak, visibility ("hidden")));
+# endif
+#endif