about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S')
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S76
1 files changed, 76 insertions, 0 deletions
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 028c6d3d74..a3535ad500 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -25,6 +25,14 @@
 #  define STRCAT  __strcat_sse2_unaligned
 # endif
 
+# ifdef __CHKP__
+#  define RETURN \
+	bndcu	-1(%rdi, %rax), %bnd0; \
+	ret
+# else
+#  define RETURN ret
+# endif
+
 # define USE_AS_STRCAT
 
 .text
@@ -37,6 +45,10 @@ ENTRY (STRCAT)
 /* Inline corresponding strlen file, temporary until new strcpy
    implementation gets merged.  */
 
+# ifdef __CHKP__
+	bndcl  (%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+# endif
 	xor	%rax, %rax
 	mov	%edi, %ecx
 	and	$0x3f, %ecx
@@ -67,84 +79,132 @@ L(align16_start):
 	pxor	%xmm1, %xmm1
 	pxor	%xmm2, %xmm2
 	pxor	%xmm3, %xmm3
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	jnz	L(exit64)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$64, %rax
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	jnz	L(exit64)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$64, %rax
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	jnz	L(exit64)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$64, %rax
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
@@ -153,6 +213,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$80, %rax
 	pmovmskb %xmm0, %edx
@@ -162,6 +225,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm1
 	add	$16, %rax
 	pmovmskb %xmm1, %edx
@@ -171,6 +237,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm2
 	add	$16, %rax
 	pmovmskb %xmm2, %edx
@@ -180,6 +249,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm3
 	add	$16, %rax
 	pmovmskb %xmm3, %edx
@@ -187,8 +259,12 @@ L(align16_start):
 	jnz	L(exit)
 
 	add	$16, %rax
+
 	.p2align 4
 	L(align64_loop):
+# ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+# endif
 	movaps	(%rax),	%xmm4
 	pminub	16(%rax),	%xmm4
 	movaps	32(%rax),	%xmm5