about summary refs log tree commit diff
path: root/sysdeps/x86_64/memchr.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/memchr.S')
-rw-r--r--sysdeps/x86_64/memchr.S78
1 files changed, 68 insertions, 10 deletions
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 891ee70aef..205345b43d 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -20,8 +20,17 @@
 
 /* fast SSE2 version with using pmaxub and 64 byte loop */
 
+#  ifdef __CHKP__
+#   define RETURN \
+	bndcu  (%rax), %bnd0; \
+	ret
+#  else
+#   define RETURN ret
+#  endif
+
 	.text
 ENTRY(memchr)
+
 	movd	%rsi, %xmm1
 	mov	%rdi, %rcx
 
@@ -33,6 +42,10 @@ ENTRY(memchr)
 	and	$63, %rcx
 	pshufd	$0, %xmm1, %xmm1
 
+#ifdef __CHKP__
+        bndcl  	(%rdi), %bnd0
+        bndcu  	(%rdi), %bnd0
+#endif
 	cmp	$48, %rcx
 	ja	L(crosscache)
 
@@ -72,7 +85,7 @@ L(crosscache):
 	jbe	L(return_null)
 	add	%rdi, %rax
 	add	%rcx, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(unaligned_no_match):
@@ -85,24 +98,36 @@ L(unaligned_no_match):
 
 	.p2align 4
 L(loop_prolog):
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+        bndcu  	16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+        bndcu  	32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
+#ifdef __CHKP__
+        bndcu  	48(%rdi), %bnd0
+#endif
 	movdqa	48(%rdi), %xmm4
 	pcmpeqb	%xmm1, %xmm4
 	add	$64, %rdi
@@ -116,24 +141,36 @@ L(loop_prolog):
 	sub	$64, %rdx
 	jbe	L(exit_loop)
 
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+        bndcu  	16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+        bndcu  	32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
+#ifdef __CHKP__
+        bndcu  	48(%rdi), %bnd0
+#endif
 	movdqa	48(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
@@ -151,6 +188,9 @@ L(loop_prolog):
 L(align64_loop):
 	sub	$64, %rdx
 	jbe	L(exit_loop)
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	movdqa	16(%rdi), %xmm2
 	movdqa	32(%rdi), %xmm3
@@ -192,25 +232,34 @@ L(align64_loop):
 	pmovmskb %xmm1, %eax
 	bsf	%eax, %eax
 	lea	48(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(exit_loop):
 	add	$32, %rdx
 	jle	L(exit_loop_32)
 
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+        bndcu   16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+        bndcu   32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
@@ -219,6 +268,9 @@ L(exit_loop):
 	sub	$16, %rdx
 	jle	L(return_null)
 
+#ifdef __CHKP__
+        bndcu   48(%rdi), %bnd0
+#endif
 	pcmpeqb	48(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
@@ -229,6 +281,9 @@ L(exit_loop):
 	.p2align 4
 L(exit_loop_32):
 	add	$32, %rdx
+#ifdef __CHKP__
+        bndcu   (%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
@@ -237,6 +292,9 @@ L(exit_loop_32):
 	sub	$16, %rdx
 	jbe	L(return_null)
 
+#ifdef __CHKP__
+        bndcu   16(%rdi), %bnd0
+#endif
 	pcmpeqb	16(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
@@ -248,25 +306,25 @@ L(exit_loop_32):
 L(matches0):
 	bsf	%eax, %eax
 	lea	-16(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches):
 	bsf	%eax, %eax
 	add	%rdi, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches16):
 	bsf	%eax, %eax
 	lea	16(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches32):
 	bsf	%eax, %eax
 	lea	32(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches_1):
@@ -274,7 +332,7 @@ L(matches_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	add	%rdi, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches16_1):
@@ -282,7 +340,7 @@ L(matches16_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	lea	16(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches32_1):
@@ -290,7 +348,7 @@ L(matches32_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	lea	32(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches48_1):
@@ -298,7 +356,7 @@ L(matches48_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	lea	48(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(return_null):