about summary refs log tree commit diff
path: root/sysdeps/x86_64/rawmemchr.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/rawmemchr.S')
-rw-r--r--sysdeps/x86_64/rawmemchr.S184
1 files changed, 6 insertions, 178 deletions
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index 4c1a3383b9..ba7e5202e6 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -17,185 +17,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
+#define RAWMEMCHR	__rawmemchr
 
-	.text
-ENTRY (__rawmemchr)
-	movd	%rsi, %xmm1
-	mov	%rdi, %rcx
+#define DEFAULT_IMPL_V1	"multiarch/rawmemchr-sse2.S"
+#define DEFAULT_IMPL_V3	"multiarch/rawmemchr-avx2.S"
+#define DEFAULT_IMPL_V4	"multiarch/rawmemchr-evex.S"
 
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-
-	and	$63, %rcx
-	pshufd	$0, %xmm1, %xmm1
-
-	cmp	$48, %rcx
-	ja	L(crosscache)
-
-	movdqu	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-
-	jnz	L(matches)
-	add	$16, %rdi
-	and	$-16, %rdi
-	jmp	L(loop_prolog)
-
-	.p2align 4
-L(crosscache):
-	and	$15, %rcx
-	and	$-16, %rdi
-	movdqa	(%rdi), %xmm0
-
-	pcmpeqb	%xmm1, %xmm0
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-	sar	%cl, %eax
-	test	%eax, %eax
-	je	L(unaligned_no_match)
-/* Check which byte is a match.  */
-	bsf	%eax, %eax
-
-	add	%rdi, %rax
-	add	%rcx, %rax
-	ret
-
-	.p2align 4
-L(unaligned_no_match):
-	add	$16, %rdi
-
-	.p2align 4
-L(loop_prolog):
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-	movdqa	16(%rdi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	48(%rdi), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	add	$64, %rdi
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches0)
-
-	test	$0x3f, %rdi
-	jz	L(align64_loop)
-
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-	movdqa	16(%rdi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	48(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-
-	add	$64, %rdi
-	test	%eax, %eax
-	jnz	L(matches0)
-
-	and	$-64, %rdi
-
-	.p2align 4
-L(align64_loop):
-	movdqa	(%rdi), %xmm0
-	movdqa	16(%rdi), %xmm2
-	movdqa	32(%rdi), %xmm3
-	movdqa	48(%rdi), %xmm4
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm0, %xmm3
-	pmaxub	%xmm2, %xmm4
-	pmaxub	%xmm3, %xmm4
-	pmovmskb %xmm4, %eax
-
-	add	$64, %rdi
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	sub	$64, %rdi
-
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-
-	pcmpeqb	48(%rdi), %xmm1
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	pmovmskb %xmm1, %eax
-	bsf	%eax, %eax
-	lea	48(%rdi, %rax), %rax
-	ret
-
-	.p2align 4
-L(matches0):
-	bsf	%eax, %eax
-	lea	-16(%rax, %rdi), %rax
-	ret
-
-	.p2align 4
-L(matches):
-	bsf	%eax, %eax
-	add	%rdi, %rax
-	ret
-
-	.p2align 4
-L(matches16):
-	bsf	%eax, %eax
-	lea	16(%rax, %rdi), %rax
-	ret
-
-	.p2align 4
-L(matches32):
-	bsf	%eax, %eax
-	lea	32(%rax, %rdi), %rax
-	ret
-
-END (__rawmemchr)
+#include "isa-default-impl.h"
 
 weak_alias (__rawmemchr, rawmemchr)
-libc_hidden_builtin_def (__rawmemchr)
+libc_hidden_def (__rawmemchr)