about summary refs log tree commit diff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/Makefile3
-rw-r--r--sysdeps/x86_64/Versions7
-rw-r--r--sysdeps/x86_64/memchr.S78
-rw-r--r--sysdeps/x86_64/memcmp.S85
-rw-r--r--sysdeps/x86_64/memrchr.S13
-rw-r--r--sysdeps/x86_64/memset.S30
-rw-r--r--sysdeps/x86_64/multiarch/Makefile23
-rw-r--r--sysdeps/x86_64/multiarch/Versions13
-rw-r--r--sysdeps/x86_64/multiarch/bcopy.S5
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-sse4.S339
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-c.c80
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S5
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back.S26
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S35
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.S14
-rw-r--r--sysdeps/x86_64/multiarch/memmove-c.c118
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S6
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3-back.S8
-rw-r--r--sysdeps/x86_64/multiarch/memmove.c34
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk.c10
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-c.c36
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S6
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S12
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S37
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.S14
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S76
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S22
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse42.S238
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S174
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.S16
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy-ssse3.S171
-rw-r--r--sysdeps/x86_64/rawmemchr.S53
-rw-r--r--sysdeps/x86_64/stpcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/strcat.S53
-rw-r--r--sysdeps/x86_64/strchr.S13
-rw-r--r--sysdeps/x86_64/strchrnul.S13
-rw-r--r--sysdeps/x86_64/strcmp.S238
-rw-r--r--sysdeps/x86_64/strcpy.S40
-rw-r--r--sysdeps/x86_64/strcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/strcpy_chk.S2
-rw-r--r--sysdeps/x86_64/strcspn.S33
-rw-r--r--sysdeps/x86_64/strlen.S19
-rw-r--r--sysdeps/x86_64/strrchr.S13
-rw-r--r--sysdeps/x86_64/strspn.S33
-rw-r--r--sysdeps/x86_64/strtok.S3
-rw-r--r--sysdeps/x86_64/wcschr.S26
-rw-r--r--sysdeps/x86_64/wcscmp.S8
-rw-r--r--sysdeps/x86_64/wcslen.S73
-rw-r--r--sysdeps/x86_64/wcsrchr.S51
53 files changed, 2308 insertions, 108 deletions
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 08db331923..db6838dc93 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -18,6 +18,9 @@ endif
 ifeq ($(subdir),string)
 sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
 gen-as-const-headers += locale-defines.sym
+ifeq ($(enable-mpx), yes)
+sysdep_routines += strcpy_chk-c stpcpy_chk-c
+endif
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions
index a437f85e6e..1de589c65b 100644
--- a/sysdeps/x86_64/Versions
+++ b/sysdeps/x86_64/Versions
@@ -2,6 +2,13 @@ libc {
   GLIBC_2.14 {
     memcpy;
   }
+%ifdef __CHKP__
+  GLIBC_2.17 {
+    chkp_memset_nobnd;
+    chkp_memset_nochk;
+    chkp_memset_nobnd_nochk;
+  }
+%endif
 }
 libm {
   GLIBC_2.1 {
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 891ee70aef..205345b43d 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -20,8 +20,17 @@
 
 /* fast SSE2 version with using pmaxub and 64 byte loop */
 
+#  ifdef __CHKP__
+#   define RETURN \
+	bndcu  (%rax), %bnd0; \
+	ret
+#  else
+#   define RETURN ret
+#  endif
+
 	.text
 ENTRY(memchr)
+
 	movd	%rsi, %xmm1
 	mov	%rdi, %rcx
 
@@ -33,6 +42,10 @@ ENTRY(memchr)
 	and	$63, %rcx
 	pshufd	$0, %xmm1, %xmm1
 
+#ifdef __CHKP__
+        bndcl  	(%rdi), %bnd0
+        bndcu  	(%rdi), %bnd0
+#endif
 	cmp	$48, %rcx
 	ja	L(crosscache)
 
@@ -72,7 +85,7 @@ L(crosscache):
 	jbe	L(return_null)
 	add	%rdi, %rax
 	add	%rcx, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(unaligned_no_match):
@@ -85,24 +98,36 @@ L(unaligned_no_match):
 
 	.p2align 4
 L(loop_prolog):
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+        bndcu  	16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+        bndcu  	32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
+#ifdef __CHKP__
+        bndcu  	48(%rdi), %bnd0
+#endif
 	movdqa	48(%rdi), %xmm4
 	pcmpeqb	%xmm1, %xmm4
 	add	$64, %rdi
@@ -116,24 +141,36 @@ L(loop_prolog):
 	sub	$64, %rdx
 	jbe	L(exit_loop)
 
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+        bndcu  	16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+        bndcu  	32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
+#ifdef __CHKP__
+        bndcu  	48(%rdi), %bnd0
+#endif
 	movdqa	48(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
@@ -151,6 +188,9 @@ L(loop_prolog):
 L(align64_loop):
 	sub	$64, %rdx
 	jbe	L(exit_loop)
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	movdqa	16(%rdi), %xmm2
 	movdqa	32(%rdi), %xmm3
@@ -192,25 +232,34 @@ L(align64_loop):
 	pmovmskb %xmm1, %eax
 	bsf	%eax, %eax
 	lea	48(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(exit_loop):
 	add	$32, %rdx
 	jle	L(exit_loop_32)
 
+#ifdef __CHKP__
+        bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+        bndcu   16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+        bndcu   32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
@@ -219,6 +268,9 @@ L(exit_loop):
 	sub	$16, %rdx
 	jle	L(return_null)
 
+#ifdef __CHKP__
+        bndcu   48(%rdi), %bnd0
+#endif
 	pcmpeqb	48(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
@@ -229,6 +281,9 @@ L(exit_loop):
 	.p2align 4
 L(exit_loop_32):
 	add	$32, %rdx
+#ifdef __CHKP__
+        bndcu   (%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
@@ -237,6 +292,9 @@ L(exit_loop_32):
 	sub	$16, %rdx
 	jbe	L(return_null)
 
+#ifdef __CHKP__
+        bndcu   16(%rdi), %bnd0
+#endif
 	pcmpeqb	16(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
@@ -248,25 +306,25 @@ L(exit_loop_32):
 L(matches0):
 	bsf	%eax, %eax
 	lea	-16(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches):
 	bsf	%eax, %eax
 	add	%rdi, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches16):
 	bsf	%eax, %eax
 	lea	16(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches32):
 	bsf	%eax, %eax
 	lea	32(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches_1):
@@ -274,7 +332,7 @@ L(matches_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	add	%rdi, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches16_1):
@@ -282,7 +340,7 @@ L(matches16_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	lea	16(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches32_1):
@@ -290,7 +348,7 @@ L(matches32_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	lea	32(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches48_1):
@@ -298,7 +356,7 @@ L(matches48_1):
 	sub	%rax, %rdx
 	jbe	L(return_null)
 	lea	48(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(return_null):
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index d5c072c7f4..77a7bcaafe 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -23,6 +23,11 @@
 ENTRY (memcmp)
 	test	%rdx, %rdx
 	jz	L(finz)
+#ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcl	(%rsi), %bnd1
+#endif
+	pxor	%xmm0, %xmm0
 	cmpq	$1, %rdx
 	jle	L(finr1b)
 	subq	%rdi, %rsi
@@ -86,6 +91,10 @@ L(s16b):
 
 	.p2align 4,, 4
 L(finr1b):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+#endif
 	movzbl	(%rdi), %eax
 	movzbl  (%rsi), %edx
 L(finz1):
@@ -132,6 +141,10 @@ L(gt32):
 	andq	$15, %r8
 	jz	L(16am)
 	/* Both pointers may be misaligned.  */
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu	(%rdi),	%xmm1
 	movdqu	(%rdi, %rsi), %xmm0
 	pcmpeqb   %xmm0, %xmm1
@@ -146,6 +159,10 @@ L(16am):
 	jz      L(ATR)
 	testq	$16, %rdi
 	jz	L(A32)
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu	(%rdi, %rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -160,6 +177,10 @@ L(A32):
 	/* Pre-unroll to be ready for unrolled 64B loop.  */
 	testq	$32, %rdi
 	jz	L(A64)
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -167,6 +188,10 @@ L(A32):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb  (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -181,6 +206,10 @@ L(A64):
         jge	L(mt32)
 
 L(A64main):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -188,6 +217,10 @@ L(A64main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -195,6 +228,10 @@ L(A64main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -202,6 +239,10 @@ L(A64main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb  (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -219,6 +260,10 @@ L(mt32):
         jge	L(mt16)
 
 L(A32main):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -226,6 +271,10 @@ L(A32main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqu    (%rdi,%rsi), %xmm0
 	pcmpeqb  (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -258,6 +307,10 @@ L(ATR):
 	testq	$16, %rdi
 	jz	L(ATR32)
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -273,6 +326,10 @@ L(ATR32):
 	testq	$32, %rdi
 	jz	L(ATR64)
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -280,6 +337,10 @@ L(ATR32):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -292,6 +353,10 @@ L(ATR64):
 	je	   L(mt32)
 
 L(ATR64main):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -299,6 +364,10 @@ L(ATR64main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -306,6 +375,10 @@ L(ATR64main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -313,6 +386,10 @@ L(ATR64main):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -328,6 +405,10 @@ L(ATR64main):
         jge	L(mt16)
 
 L(ATR32res):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
@@ -335,6 +416,10 @@ L(ATR32res):
 	jnz       L(neq)
 	addq       $16, %rdi
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rdi, %rsi), %bnd1
+#endif
 	movdqa    (%rdi,%rsi), %xmm0
 	pcmpeqb   (%rdi), %xmm0
 	pmovmskb  %xmm0, %edx
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 5a659feede..3afa97c0b7 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -27,6 +27,11 @@ ENTRY (memrchr)
 	sub	$16, %rdx
 	jbe	L(length_less16)
 
+#ifdef __CHKP__
+        bndcl  (%rdi), %bnd0
+        bndcu  -1(%rdi, %rdx), %bnd0
+#endif
+
 	punpcklbw	%xmm1, %xmm1
 	punpcklbw	%xmm1, %xmm1
 
@@ -284,6 +289,10 @@ L(length_less16_offset0):
 	test	%edx, %edx
 	jz	L(return_null)
 
+#ifdef __CHKP__
+        bndcl  (%rdi), %bnd0
+        bndcu  -1(%rdi, %rdx), %bnd0
+#endif
 	mov	%dl, %cl
 	pcmpeqb	(%rdi), %xmm1
 
@@ -314,6 +323,10 @@ L(length_less16):
 	and	$15, %rcx
 	jz	L(length_less16_offset0)
 
+#ifdef __CHKP__
+        bndcl  (%rdi), %bnd0
+        bndcu  -1(%rdi, %rdx), %bnd0
+#endif
 	mov	%rdi, %rcx
 	and	$15, %rcx
 	mov	%cl, %dh
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 6c69f4b442..ccb2aae480 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -26,6 +26,15 @@
 	.text
 #if !defined NOT_IN_libc
 ENTRY(__bzero)
+	testq	%rsi, %rsi
+	jz	L(only_return)
+
+#if defined __CHKP__ && defined __CHKWR__
+	bndcl	(%rdi), %bnd0
+	bndcu	-1(%rdi, %rsi), %bnd0
+# endif
+
+	mov	%rdi, %rax
 	movq	%rdi, %rax /* Set return value.  */
 	movq	%rsi, %rdx /* Set n.  */
 	pxor	%xmm8, %xmm8
@@ -53,7 +62,20 @@ ENTRY_CHK (__memset_chk)
 END_CHK (__memset_chk)
 #endif
 
+#if defined __CHKP__ && defined __CHKWR__
+ENTRY (chkp_memset_nochk)
+	jmp	L(entry_from_chkp_memset_nochk)
+END (chkp_memset_nochk)
+#endif
+
 ENTRY (memset)
+	testq	%rdx, %rdx
+	jz	L(only_return)
+#if defined __CHKP__ && defined __CHKWR__
+	bndcl	(%rdi), %bnd0
+	bndcu	-1(%rdi, %rdx), %bnd0
+L(entry_from_chkp_memset_nochk):
+#endif
 	movd	%esi, %xmm8
 	movq	%rdi, %rax
 	punpcklbw	%xmm8, %xmm8
@@ -71,6 +93,9 @@ L(entry_from_bzero):
 L(return):
 	rep
 	ret
+L(only_return):
+	movq	%rdi, %rax
+	ret
 	ALIGN (4)
 L(between_32_64_bytes):
 	movdqu	%xmm8, 16(%rdi)
@@ -129,6 +154,11 @@ L(between8_16bytes):
 END (memset)
 libc_hidden_builtin_def (memset)
 
+#if defined __CHKP__ && defined __CHKWR__
+weak_alias (memset, chkp_memset_nobnd)
+weak_alias (chkp_memset_nochk, chkp_memset_nobnd_nochk)
+#endif
+
 #if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
 strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
 	.section .gnu.warning.__memset_zero_constant_len_parameter
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 203d16eed3..bdf7964d14 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -26,6 +26,29 @@ CFLAGS-strstr.c += -msse4
 CFLAGS-strcasestr.c += -msse4
 CFLAGS-strcasestr-nonascii.c += -msse4
 endif
+
+ifeq ($(enable-mpx), yes)
+sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \
+						 memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c
+#These are C versions written with intrinsics. We need to add checks as intrinsics manually
+CFLAGS-varshift.c +=  -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcspn-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strpbrk-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strspn-c.c +=  -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strstr.c +=    -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcasestr.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcasestr-nonascii.c += -fno-chkp-check-read -fno-chkp-check-write
+#Checks are put manually for these routines.
+CFLAGS-memcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-mempcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-memmove-c.c += -fno-chkp-check-read -fno-chkp-check-write
+endif
+
+ifeq ($(enable-mpx-write-only), yes)
+CFLAGS-memcpy-c.c += -D__CHKWR__
+CFLAGS-memmove-c.c += -D__CHKWR__
+endif
+
 endif
 
 ifeq ($(subdir),wcsmbs)
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
index 59b185ac8d..5325bdece6 100644
--- a/sysdeps/x86_64/multiarch/Versions
+++ b/sysdeps/x86_64/multiarch/Versions
@@ -2,4 +2,17 @@ libc {
   GLIBC_PRIVATE {
     __get_cpu_features;
   }
+%ifdef __CHKP__
+  GLIBC_2.17 {
+   chkp_memcpy_nobnd;
+   chkp_memmove_nobnd;
+   chkp_mempcpy_nobnd;
+   chkp_memcpy_nobnd_nochk;
+   chkp_memmove_nobnd_nochk;
+   chkp_mempcpy_nobnd_nochk;
+   chkp_memcpy_nochk;
+   chkp_memmove_nochk;
+   chkp_mempcpy_nochk;
+  }
+%endif
 }
diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S
index 639f02bde3..9809d471ba 100644
--- a/sysdeps/x86_64/multiarch/bcopy.S
+++ b/sysdeps/x86_64/multiarch/bcopy.S
@@ -3,5 +3,10 @@
 	.text
 ENTRY(bcopy)
 	xchg	%rdi, %rsi
+#ifdef __CHKP__
+	bndmov %bnd0, %bnd2
+	bndmov %bnd1, %bnd0
+	bndmov %bnd2, %bnd1
+#endif
 	jmp	__libc_memmove	/* Branch to IFUNC memmove.  */
 END(bcopy)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d0992e113f..e3a4163c5b 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
 
+#ifndef __CHKP__
+  /* We use specific version for MPX glibc */
   /* Support sysdeps/x86_64/multiarch/memmove_chk.S.  */
   IFUNC_IMPL (i, name, __memmove_chk,
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
 			      __memmove_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#endif
 
   /* Support sysdeps/x86_64/multiarch/stpncpy.S.  */
   IFUNC_IMPL (i, name, stpncpy,
@@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
 
 #ifdef SHARED
+#ifndef __CHKP__
+  /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled.  */
   /* Support sysdeps/x86_64/multiarch/memcpy_chk.S.  */
   IFUNC_IMPL (i, name, __memcpy_chk,
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
@@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
 			      __mempcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+#endif
 
   /* Support sysdeps/x86_64/multiarch/strncmp.S.  */
   IFUNC_IMPL (i, name, strncmp,
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200f4c..b5c6675d31 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -48,6 +48,13 @@ ENTRY (MEMCMP)
 # ifdef USE_AS_WMEMCMP
 	shl	$2, %rdx
 # endif
+# ifdef __CHKP__
+	testq	%rdx, %rdx
+	jz	L(NoEntryCheck)
+	bndcl	(%rdi), %bnd0
+	bndcl	(%rsi), %bnd1
+L(NoEntryCheck):
+# endif
 	pxor	%xmm0, %xmm0
 	cmp	$79, %rdx
 	ja	L(79bytesormore)
@@ -70,6 +77,10 @@ L(firstbyte):
 
 	ALIGN (4)
 L(79bytesormore):
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rsi), %xmm1
 	movdqu	(%rdi), %xmm2
 	pxor	%xmm1, %xmm2
@@ -90,21 +101,37 @@ L(79bytesormore):
 L(less128bytes):
 	sub	$64, %rdx
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqu	32(%rdi), %xmm2
 	pxor	32(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(48bytesin256)
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqu	48(%rdi), %xmm2
 	pxor	48(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -112,11 +139,19 @@ L(less128bytes):
 	cmp	$32, %rdx
 	jb	L(less32bytesin64)
 
+# ifdef __CHKP__
+	bndcu	64(%rdi), %bnd0
+	bndcu	64(%rsi), %bnd1
+# endif
 	movdqu	64(%rdi), %xmm2
 	pxor	64(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(80bytesin256)
 
+# ifdef __CHKP__
+	bndcu	80(%rdi), %bnd0
+	bndcu	80(%rsi), %bnd1
+# endif
 	movdqu	80(%rdi), %xmm2
 	pxor	80(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -139,41 +174,73 @@ L(128bytesormore):
 L(less256bytes):
 	sub	$128, %rdx
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqu	32(%rdi), %xmm2
 	pxor	32(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(48bytesin256)
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqu	48(%rdi), %xmm2
 	pxor	48(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(64bytesin256)
 
+# ifdef __CHKP__
+	bndcu	64(%rdi), %bnd0
+	bndcu	64(%rsi), %bnd1
+# endif
 	movdqu	64(%rdi), %xmm2
 	pxor	64(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(80bytesin256)
 
+# ifdef __CHKP__
+	bndcu	80(%rdi), %bnd0
+	bndcu	80(%rsi), %bnd1
+# endif
 	movdqu	80(%rdi), %xmm2
 	pxor	80(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(96bytesin256)
 
+# ifdef __CHKP__
+	bndcu	96(%rdi), %bnd0
+	bndcu	96(%rsi), %bnd1
+# endif
 	movdqu	96(%rdi), %xmm2
 	pxor	96(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(112bytesin256)
 
+# ifdef __CHKP__
+	bndcu	112(%rdi), %bnd0
+	bndcu	112(%rsi), %bnd1
+# endif
 	movdqu	112(%rdi), %xmm2
 	pxor	112(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -188,11 +255,19 @@ L(less256bytes):
 	cmp	$32, %rdx
 	jb	L(less32bytesin128)
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -207,81 +282,145 @@ L(less32bytesin128):
 
 L(less512bytes):
 	sub	$256, %rdx
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqu	32(%rdi), %xmm2
 	pxor	32(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(48bytesin256)
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqu	48(%rdi), %xmm2
 	pxor	48(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(64bytesin256)
 
+# ifdef __CHKP__
+	bndcu	64(%rdi), %bnd0
+	bndcu	64(%rsi), %bnd1
+# endif
 	movdqu	64(%rdi), %xmm2
 	pxor	64(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(80bytesin256)
 
+# ifdef __CHKP__
+	bndcu	80(%rdi), %bnd0
+	bndcu	80(%rsi), %bnd1
+# endif
 	movdqu	80(%rdi), %xmm2
 	pxor	80(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(96bytesin256)
 
+# ifdef __CHKP__
+	bndcu	96(%rdi), %bnd0
+	bndcu	96(%rsi), %bnd1
+# endif
 	movdqu	96(%rdi), %xmm2
 	pxor	96(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(112bytesin256)
 
+# ifdef __CHKP__
+	bndcu	112(%rdi), %bnd0
+	bndcu	112(%rsi), %bnd1
+# endif
 	movdqu	112(%rdi), %xmm2
 	pxor	112(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(128bytesin256)
 
+# ifdef __CHKP__
+	bndcu	128(%rdi), %bnd0
+	bndcu	128(%rsi), %bnd1
+# endif
 	movdqu	128(%rdi), %xmm2
 	pxor	128(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(144bytesin256)
 
+# ifdef __CHKP__
+	bndcu	144(%rdi), %bnd0
+	bndcu	144(%rsi), %bnd1
+# endif
 	movdqu	144(%rdi), %xmm2
 	pxor	144(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(160bytesin256)
 
+# ifdef __CHKP__
+	bndcu	160(%rdi), %bnd0
+	bndcu	160(%rsi), %bnd1
+# endif
 	movdqu	160(%rdi), %xmm2
 	pxor	160(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(176bytesin256)
 
+# ifdef __CHKP__
+	bndcu	176(%rdi), %bnd0
+	bndcu	176(%rsi), %bnd1
+# endif
 	movdqu	176(%rdi), %xmm2
 	pxor	176(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(192bytesin256)
 
+# ifdef __CHKP__
+	bndcu	192(%rdi), %bnd0
+	bndcu	192(%rsi), %bnd1
+# endif
 	movdqu	192(%rdi), %xmm2
 	pxor	192(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(208bytesin256)
 
+# ifdef __CHKP__
+	bndcu	208(%rdi), %bnd0
+	bndcu	208(%rsi), %bnd1
+# endif
 	movdqu	208(%rdi), %xmm2
 	pxor	208(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(224bytesin256)
 
+# ifdef __CHKP__
+	bndcu	224(%rdi), %bnd0
+	bndcu	224(%rsi), %bnd1
+# endif
 	movdqu	224(%rdi), %xmm2
 	pxor	224(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(240bytesin256)
 
+# ifdef __CHKP__
+	bndcu	240(%rdi), %bnd0
+	bndcu	240(%rsi), %bnd1
+# endif
 	movdqu	240(%rdi), %xmm2
 	pxor	240(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -299,11 +438,19 @@ L(less512bytes):
 	cmp	$32, %rdx
 	jb	L(less32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -331,18 +478,34 @@ L(512bytesormore):
 	sub	$64, %rdx
 	ALIGN (4)
 L(64bytesormore_loop):
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	movdqa	%xmm2, %xmm1
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm3
 	pxor	16(%rsi), %xmm3
 	por	%xmm3, %xmm1
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqu	32(%rdi), %xmm4
 	pxor	32(%rsi), %xmm4
 	por	%xmm4, %xmm1
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqu	48(%rdi), %xmm5
 	pxor	48(%rsi), %xmm5
 	por	%xmm5, %xmm1
@@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined):
 L(L2_L3_unaligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	movdqa	%xmm2, %xmm1
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm3
 	pxor	16(%rsi), %xmm3
 	por	%xmm3, %xmm1
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqu	32(%rdi), %xmm4
 	pxor	32(%rsi), %xmm4
 	por	%xmm4, %xmm1
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqu	48(%rdi), %xmm5
 	pxor	48(%rsi), %xmm5
 	por	%xmm5, %xmm1
@@ -403,21 +582,37 @@ L(2aligned):
 L(less128bytesin2aligned):
 	sub	$64, %rdx
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqa	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqa	32(%rdi), %xmm2
 	pxor	32(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(48bytesin256)
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqa	48(%rdi), %xmm2
 	pxor	48(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -425,11 +620,19 @@ L(less128bytesin2aligned):
 	cmp	$32, %rdx
 	jb	L(less32bytesin64in2alinged)
 
+# ifdef __CHKP__
+	bndcu	64(%rdi), %bnd0
+	bndcu	64(%rsi), %bnd1
+# endif
 	movdqa	64(%rdi), %xmm2
 	pxor	64(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(80bytesin256)
 
+# ifdef __CHKP__
+	bndcu	80(%rdi), %bnd0
+	bndcu	80(%rsi), %bnd1
+# endif
 	movdqa	80(%rdi), %xmm2
 	pxor	80(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -453,41 +656,73 @@ L(128bytesormorein2aligned):
 L(less256bytesin2alinged):
 	sub	$128, %rdx
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqa	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqa	32(%rdi), %xmm2
 	pxor	32(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(48bytesin256)
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqa	48(%rdi), %xmm2
 	pxor	48(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(64bytesin256)
 
+# ifdef __CHKP__
+	bndcu	64(%rdi), %bnd0
+	bndcu	64(%rsi), %bnd1
+# endif
 	movdqa	64(%rdi), %xmm2
 	pxor	64(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(80bytesin256)
 
+# ifdef __CHKP__
+	bndcu	80(%rdi), %bnd0
+	bndcu	80(%rsi), %bnd1
+# endif
 	movdqa	80(%rdi), %xmm2
 	pxor	80(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(96bytesin256)
 
+# ifdef __CHKP__
+	bndcu	96(%rdi), %bnd0
+	bndcu	96(%rsi), %bnd1
+# endif
 	movdqa	96(%rdi), %xmm2
 	pxor	96(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(112bytesin256)
 
+# ifdef __CHKP__
+	bndcu	112(%rdi), %bnd0
+	bndcu	112(%rsi), %bnd1
+# endif
 	movdqa	112(%rdi), %xmm2
 	pxor	112(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -502,11 +737,19 @@ L(less256bytesin2alinged):
 	cmp	$32, %rdx
 	jb	L(less32bytesin128in2aligned)
 
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqu	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -523,81 +766,145 @@ L(less32bytesin128in2aligned):
 L(256bytesormorein2aligned):
 
 	sub	$256, %rdx
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(16bytesin256)
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqa	16(%rdi), %xmm2
 	pxor	16(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(32bytesin256)
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqa	32(%rdi), %xmm2
 	pxor	32(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(48bytesin256)
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqa	48(%rdi), %xmm2
 	pxor	48(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(64bytesin256)
 
+# ifdef __CHKP__
+	bndcu	64(%rdi), %bnd0
+	bndcu	64(%rsi), %bnd1
+# endif
 	movdqa	64(%rdi), %xmm2
 	pxor	64(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(80bytesin256)
 
+# ifdef __CHKP__
+	bndcu	80(%rdi), %bnd0
+	bndcu	80(%rsi), %bnd1
+# endif
 	movdqa	80(%rdi), %xmm2
 	pxor	80(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(96bytesin256)
 
+# ifdef __CHKP__
+	bndcu	96(%rdi), %bnd0
+	bndcu	96(%rsi), %bnd1
+# endif
 	movdqa	96(%rdi), %xmm2
 	pxor	96(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(112bytesin256)
 
+# ifdef __CHKP__
+	bndcu	112(%rdi), %bnd0
+	bndcu	112(%rsi), %bnd1
+# endif
 	movdqa	112(%rdi), %xmm2
 	pxor	112(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(128bytesin256)
 
+# ifdef __CHKP__
+	bndcu	128(%rdi), %bnd0
+	bndcu	128(%rsi), %bnd1
+# endif
 	movdqa	128(%rdi), %xmm2
 	pxor	128(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(144bytesin256)
 
+# ifdef __CHKP__
+	bndcu	144(%rdi), %bnd0
+	bndcu	144(%rsi), %bnd1
+# endif
 	movdqa	144(%rdi), %xmm2
 	pxor	144(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(160bytesin256)
 
+# ifdef __CHKP__
+	bndcu	160(%rdi), %bnd0
+	bndcu	160(%rsi), %bnd1
+# endif
 	movdqa	160(%rdi), %xmm2
 	pxor	160(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(176bytesin256)
 
+# ifdef __CHKP__
+	bndcu	176(%rdi), %bnd0
+	bndcu	176(%rsi), %bnd1
+# endif
 	movdqa	176(%rdi), %xmm2
 	pxor	176(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(192bytesin256)
 
+# ifdef __CHKP__
+	bndcu	192(%rdi), %bnd0
+	bndcu	192(%rsi), %bnd1
+# endif
 	movdqa	192(%rdi), %xmm2
 	pxor	192(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(208bytesin256)
 
+# ifdef __CHKP__
+	bndcu	208(%rdi), %bnd0
+	bndcu	208(%rsi), %bnd1
+# endif
 	movdqa	208(%rdi), %xmm2
 	pxor	208(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(224bytesin256)
 
+# ifdef __CHKP__
+	bndcu	224(%rdi), %bnd0
+	bndcu	224(%rsi), %bnd1
+# endif
 	movdqa	224(%rdi), %xmm2
 	pxor	224(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
 	jnc	L(240bytesin256)
 
+# ifdef __CHKP__
+	bndcu	240(%rdi), %bnd0
+	bndcu	240(%rsi), %bnd1
+# endif
 	movdqa	240(%rdi), %xmm2
 	pxor	240(%rsi), %xmm2
 	ptest	%xmm2, %xmm0
@@ -648,18 +955,34 @@ L(512bytesormorein2aligned):
 	sub	$64, %rdx
 	ALIGN (4)
 L(64bytesormore_loopin2aligned):
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	movdqa	%xmm2, %xmm1
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqa	16(%rdi), %xmm3
 	pxor	16(%rsi), %xmm3
 	por	%xmm3, %xmm1
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqa	32(%rdi), %xmm4
 	pxor	32(%rsi), %xmm4
 	por	%xmm4, %xmm1
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqa	48(%rdi), %xmm5
 	pxor	48(%rsi), %xmm5
 	por	%xmm5, %xmm1
@@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined):
 L(L2_L3_aligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+# endif
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
 	movdqa	%xmm2, %xmm1
 
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+	bndcu	16(%rsi), %bnd1
+# endif
 	movdqa	16(%rdi), %xmm3
 	pxor	16(%rsi), %xmm3
 	por	%xmm3, %xmm1
 
+# ifdef __CHKP__
+	bndcu	32(%rdi), %bnd0
+	bndcu	32(%rsi), %bnd1
+# endif
 	movdqa	32(%rdi), %xmm4
 	pxor	32(%rsi), %xmm4
 	por	%xmm4, %xmm1
 
+# ifdef __CHKP__
+	bndcu	48(%rdi), %bnd0
+	bndcu	48(%rsi), %bnd1
+# endif
 	movdqa	48(%rdi), %xmm5
 	pxor	48(%rsi), %xmm5
 	por	%xmm5, %xmm1
diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c
new file mode 100644
index 0000000000..6fa50eada1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-c.c
@@ -0,0 +1,80 @@
+/* C-version of memcpy for using when Intel MPX is on
+   in order to prosess with a buffer of pointers correctly.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+  if (!n) return dst;
+
+  __bnd_chk_ptr_lbounds(dst);
+  __bnd_chk_ptr_ubounds(dst+n-1);
+#ifndef __CHKWR__
+  __bnd_chk_ptr_lbounds(src);
+  __bnd_chk_ptr_ubounds(src+n-1);
+#endif
+
+  return chkp_memcpy_nochk(dst, src, n);
+}
+
+void *
+chkp_memcpy_nochk (void *dst, const void *src, size_t n)
+{
+  const char *s = src;
+  char *d = dst;
+  void *ret = dst;
+  size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+  size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+  if (offset_src != offset_dst)
+  {
+    while (n--)
+      *d++ = *s++;
+  }
+  else
+  {
+    if (offset_src) offset_src = sizeof(size_t) - offset_src;
+    while (n-- && offset_src--)
+      *d++ = *s++;
+    n++;
+    if (!n) return ret;
+    void **d1 = (void **)d;
+    void **s1 = (void **)s;
+    while (n >= sizeof(void *))
+    {
+      n -= sizeof(void *);
+      *d1++ = *s1++;
+    }
+    s = (char *)s1;
+    d = (char *)d1;
+    while (n--)
+      *d++ = *s++;
+  }
+  return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+
+# if defined SHARED && !defined NOT_IN_libc && !defined IA32
+#  include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+# else
+weak_alias (__memcpy, memcpy)
+# endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
new file mode 100644
index 0000000000..7fedbeef8e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
@@ -0,0 +1,5 @@
+/* optimized version of memcpy without any checks or copying bounds.  */
+#define MEMCPY chkp_memcpy_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef27d..16b4e680a1 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -27,7 +27,11 @@
 #include "asm-syntax.h"
 
 #ifndef MEMCPY
-# define MEMCPY		__memcpy_ssse3_back
+# if defined  __CHKP__ || defined __CHKWR__
+#  define MEMCPY		chkp_memcpy_nobnd
+# else
+#  define MEMCPY		__memcpy_ssse3_back
+# endif
 # define MEMCPY_CHK	__memcpy_chk_ssse3_back
 #endif
 
@@ -48,7 +52,7 @@
   ud2
 
 	.section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && defined MEMCPY_CHK
 ENTRY (MEMCPY_CHK)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
@@ -56,6 +60,15 @@ END (MEMCPY_CHK)
 #endif
 
 ENTRY (MEMCPY)
+#ifdef __CHKP__
+	testq	%rdx, %rdx
+	jz	L(NoEntryCheck)
+	bndcl	(%rdi), %bnd0
+	bndcu	-1(%rdi, %rdx), %bnd0
+	bndcl	(%rsi), %bnd1
+	bndcu	-1(%rsi, %rdx), %bnd1
+#endif
+
 	mov	%rdi, %rax
 #ifdef USE_AS_MEMPCPY
 	add	%rdx, %rax
@@ -87,6 +100,15 @@ L(bk_write):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 #endif
 
+#ifdef __CHKP__
+L(NoEntryCheck):
+	mov	%rdi, %rax
+# ifdef USE_AS_MEMPCPY
+	add	%rdx, %rax
+# endif
+	ret
+#endif
+
 	ALIGN (4)
 L(144bytesormore):
 
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index a1e5031376..fc5ab2da03 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -18,14 +18,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
+#if !defined  __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <shlib-compat.h>
+# include <init-arch.h>
 
 /* Define multiple versions only for the definition in lib and for
    DSO.  In static binaries we need memcpy before the initialization
    happened.  */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
 	.text
 ENTRY(__new_memcpy)
 	.type	__new_memcpy, @gnu_indirect_function
@@ -43,37 +44,39 @@ ENTRY(__new_memcpy)
 3:	ret
 END(__new_memcpy)
 
-# undef ENTRY
-# define ENTRY(name) \
+#  undef ENTRY
+#  define ENTRY(name) \
 	.type __memcpy_sse2, @function; \
 	.globl __memcpy_sse2; \
 	.hidden __memcpy_sse2; \
 	.p2align 4; \
 	__memcpy_sse2: cfi_startproc; \
 	CALL_MCOUNT
-# undef END
-# define END(name) \
+#  undef END
+#  define END(name) \
 	cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
 
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+#  undef ENTRY_CHK
+#  define ENTRY_CHK(name) \
 	.type __memcpy_chk_sse2, @function; \
 	.globl __memcpy_chk_sse2; \
 	.p2align 4; \
 	__memcpy_chk_sse2: cfi_startproc; \
 	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+#  undef END_CHK
+#  define END_CHK(name) \
 	cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
 
-# undef libc_hidden_builtin_def
+#  undef libc_hidden_builtin_def
 /* It doesn't make sense to send libc-internal memcpy calls through a PLT.
    The speedup we get from using SSSE3 instruction is likely eaten away
    by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
+#  define libc_hidden_builtin_def(name) \
 	.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
 
 versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
+# endif
+
+# include "../memcpy.S"
 
-#include "../memcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
new file mode 100644
index 0000000000..1eee86c639
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memcpy_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index ad01d8cd9f..6f87f2686d 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -18,14 +18,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined  __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
 
 /* Define multiple versions only for the definition in lib and for
    DSO.  There are no multiarch memcpy functions for static binaries.
  */
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+#  ifdef SHARED
 	.text
 ENTRY(__memcpy_chk)
 	.type	__memcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__memcpy_chk)
 	leaq	__memcpy_chk_ssse3_back(%rip), %rax
 2:	ret
 END(__memcpy_chk)
-# else
-#  include "../memcpy_chk.S"
+#  else
+#   include "../memcpy_chk.S"
+#  endif
 # endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c
new file mode 100644
index 0000000000..7111128e75
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-c.c
@@ -0,0 +1,118 @@
+/* C-version of memmove for using when Intel MPX is enabled
+   in order to prosess with a buffer of pointers correctly.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+  if (n == 0) return dst;
+
+  __bnd_chk_ptr_lbounds(dst);
+  __bnd_chk_ptr_ubounds(dst+n-1);
+#ifndef __CHKWR__
+  __bnd_chk_ptr_lbounds(src);
+  __bnd_chk_ptr_ubounds(src+n-1);
+#endif
+  return chkp_memmove_nochk(dst, src, n);
+}
+
+
+void *
+chkp_memmove_nochk (void *dst, const void *src, size_t n)
+{
+  const char *s = src;
+  char *d = dst;
+  void *ret = dst;
+  size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+  size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+  if (offset_src != offset_dst)
+  {
+    if (s < d)
+    {
+      /* backward copying */
+      d += n;
+      s += n;
+      while (n--)
+        *--d = *--s;
+    }
+    else
+      /* forward copying */
+      while (n--)
+        *d++ = *s++;
+  }
+  else
+  {
+    if (s < d)
+    {
+      offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+      /* backward copying */
+      d += n;
+      s += n;
+      while (n-- && offset_src--)
+        *--d = *--s;
+      n++;
+      if (!n) return ret;
+      void **d1 = (void **)d;
+      void **s1 = (void **)s;
+      while (n >= sizeof(void *))
+      {
+        n -= sizeof(void *);
+        *--d1 = *--s1;
+      }
+      s = (char *)s1;
+      d = (char *)d1;
+      while (n--)
+        *--d = *--s;
+    }
+    else
+    {
+      if (offset_src) offset_src = sizeof(size_t) - offset_src;
+      /* forward copying */
+      while (n-- && offset_src--)
+        *d++ = *s++;
+      n++;
+      if (!n) return ret;
+      void **d1 = (void **)d;
+      void **s1 = (void **)s;
+      while (n >= sizeof(void *))
+      {
+        n -= sizeof(void *);
+        *d1++ = *s1++;
+      }
+      s = (char *)s1;
+      d = (char *)d1;
+      while (n--)
+        *d++ = *s++;
+    }
+  }
+  return ret;
+}
+
+weak_alias (__memmove, __libc_memmove)
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
+
+# if defined SHARED && !defined NOT_IN_libc
+#  include <shlib-compat.h>
+#  if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+#  endif
+# endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
new file mode 100644
index 0000000000..2a1f3e67b7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
@@ -0,0 +1,6 @@
+/* optimized version of memmove without any checks or copying bounds.  */
+#define USE_AS_MEMMOVE
+#define MEMCPY chkp_memmove_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
index f9a4e9aff9..478141b14a 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
@@ -1,4 +1,10 @@
 #define USE_AS_MEMMOVE
-#define MEMCPY		__memmove_ssse3_back
+#if defined __CHKP__ || defined __CHKWR__
+/* version of memmove with no copying of bounds support
+   if there are pointers in the source buffer. */
+# define MEMCPY	   chkp_memmove_nobnd
+# else
+# define MEMCPY		__memmove_ssse3_back
+#endif
 #define MEMCPY_CHK	__memmove_chk_ssse3_back
 #include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8149c487d5..0d2c6f0266 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -17,31 +17,32 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef NOT_IN_libc
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-#  define libc_hidden_builtin_def(name) \
+#ifndef __CHKP__
+# ifndef NOT_IN_libc
+#  define MEMMOVE __memmove_sse2
+#  ifdef SHARED
+#   undef libc_hidden_builtin_def
+#   define libc_hidden_builtin_def(name) \
   __hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
+#  endif
 
 /* Redefine memmove so that the compiler won't complain about the type
    mismatch with the IFUNC selector in strong_alias, below.  */
-# undef memmove
-# define memmove __redirect_memmove
-# include <string.h>
-# undef memmove
+#  undef memmove
+#  define memmove __redirect_memmove
+#  include <string.h>
+#  undef memmove
 
 extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-#endif
+# endif
 
-#include "string/memmove.c"
+# include "string/memmove.c"
 
-#ifndef NOT_IN_libc
-# include <shlib-compat.h>
-# include "init-arch.h"
+# ifndef NOT_IN_libc
+#  include <shlib-compat.h>
+#  include "init-arch.h"
 
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
@@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove,
 
 strong_alias (__libc_memmove, memmove)
 
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+#  if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
 compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+#  endif
 # endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c
new file mode 100644
index 0000000000..bbf53d00d3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 17ed460324..c1b0b9304b 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -17,19 +17,21 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <string.h>
-#include "init-arch.h"
+#ifndef __CHKP__
+# include <string.h>
+# include "init-arch.h"
 
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define MEMMOVE_CHK __memmove_chk_sse2
 
 extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
 
-#include "debug/memmove_chk.c"
+# include "debug/memmove_chk.c"
 
 libc_ifunc (__memmove_chk,
 	    HAS_SSSE3
 	    ? (HAS_FAST_COPY_BACKWARD
 	       ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
 	    : __memmove_chk_sse2);
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c
new file mode 100644
index 0000000000..522fb86e3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-c.c
@@ -0,0 +1,36 @@
+/* C-version of mempcpy for using when Intel MPX is enabled
+   in order to process with an array of pointers correctly.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+  return memcpy(dst, src, n) + n;
+}
+
+void *
+chkp_mempcpy_nochk (void *dst, const void *src, size_t n)
+{
+  return chkp_memcpy_nochk(dst, src, n) + n;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __GI___mempcpy)
+weak_alias (mempcpy, __mempcpy)
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
new file mode 100644
index 0000000000..eb929f4182
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
@@ -0,0 +1,6 @@
+/* optimized version of mempcpy without any checks or copying bounds.  */
+#define USE_AS_MEMPCPY
+#define MEMCPY chkp_mempcpy_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
index 82ffacb8fb..f32ecfc76e 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
@@ -1,4 +1,12 @@
 #define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_ssse3_back
-#define MEMCPY_CHK	__mempcpy_chk_ssse3_back
+
+#if defined __CHKP__ || defined __CHKWR__
+/* version of mempcpy with no copying of bounds support
+   if there are pointers in the source buffer. */
+# define MEMCPY  chkp_mempcpy_nobnd
+#else
+# define MEMCPY	__mempcpy_ssse3_back
+#endif
+
+#define MEMCPY_CHK __mempcpy_chk_ssse3_back
 #include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index b8b7fcd121..4ec5825989 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -18,13 +18,14 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined  __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
 
 /* Define multiple versions only for the definition in lib and for
    DSO.  In static binaries we need mempcpy before the initialization
    happened.  */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
 ENTRY(__mempcpy)
 	.type	__mempcpy, @gnu_indirect_function
 	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
@@ -40,38 +41,40 @@ ENTRY(__mempcpy)
 2:	ret
 END(__mempcpy)
 
-# undef ENTRY
-# define ENTRY(name) \
+#  undef ENTRY
+#  define ENTRY(name) \
 	.type __mempcpy_sse2, @function; \
 	.p2align 4; \
 	.globl __mempcpy_sse2; \
 	.hidden __mempcpy_sse2; \
 	__mempcpy_sse2: cfi_startproc; \
 	CALL_MCOUNT
-# undef END
-# define END(name) \
+#  undef END
+#  define END(name) \
 	cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
 
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+#  undef ENTRY_CHK
+#  define ENTRY_CHK(name) \
 	.type __mempcpy_chk_sse2, @function; \
 	.globl __mempcpy_chk_sse2; \
 	.p2align 4; \
 	__mempcpy_chk_sse2: cfi_startproc; \
 	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+#  undef END_CHK
+#  define END_CHK(name) \
 	cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
 
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
+#  undef libc_hidden_def
+#  undef libc_hidden_builtin_def
 /* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
    The speedup we get from using SSSE3 instruction is likely eaten away
    by the indirect call in the PLT.  */
-# define libc_hidden_def(name) \
+#  define libc_hidden_def(name) \
 	.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
+#  define libc_hidden_builtin_def(name) \
 	.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
+# endif
+
+# include "../mempcpy.S"
 
-#include "../mempcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
new file mode 100644
index 0000000000..ba170784c3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/mempcpy_chk.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 3801db399b..98acf9691c 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -18,14 +18,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined  __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
 
 /* Define multiple versions only for the definition in lib and for
    DSO.  There are no multiarch mempcpy functions for static binaries.
  */
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+#  ifdef SHARED
 	.text
 ENTRY(__mempcpy_chk)
 	.type	__mempcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk)
 	leaq	__mempcpy_chk_ssse3_back(%rip), %rax
 2:	ret
 END(__mempcpy_chk)
-# else
-#  include "../mempcpy_chk.S"
+#  else
+#   include "../mempcpy_chk.S"
+#  endif
 # endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 028c6d3d74..a3535ad500 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -25,6 +25,14 @@
 #  define STRCAT  __strcat_sse2_unaligned
 # endif
 
+# ifdef __CHKP__
+#  define RETURN \
+	bndcu	-1(%rdi, %rax), %bnd0; \
+	ret
+# else
+#  define RETURN ret
+# endif
+
 # define USE_AS_STRCAT
 
 .text
@@ -37,6 +45,10 @@ ENTRY (STRCAT)
 /* Inline corresponding strlen file, temporary until new strcpy
    implementation gets merged.  */
 
+# ifdef __CHKP__
+	bndcl  (%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+# endif
 	xor	%rax, %rax
 	mov	%edi, %ecx
 	and	$0x3f, %ecx
@@ -67,84 +79,132 @@ L(align16_start):
 	pxor	%xmm1, %xmm1
 	pxor	%xmm2, %xmm2
 	pxor	%xmm3, %xmm3
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	jnz	L(exit64)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$64, %rax
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	jnz	L(exit64)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$64, %rax
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	jnz	L(exit64)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$64, %rax
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	jnz	L(exit16)
 
+# ifdef __CHKP__
+	bndcu	32(%rax), %bnd0
+# endif
 	pcmpeqb	32(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	jnz	L(exit32)
 
+# ifdef __CHKP__
+	bndcu	48(%rax), %bnd0
+# endif
 	pcmpeqb	48(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	jnz	L(exit48)
 
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 	pcmpeqb	64(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
@@ -153,6 +213,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	80(%rax), %bnd0
+# endif
 	pcmpeqb	80(%rax), %xmm0
 	add	$80, %rax
 	pmovmskb %xmm0, %edx
@@ -162,6 +225,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm1
 	add	$16, %rax
 	pmovmskb %xmm1, %edx
@@ -171,6 +237,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm2
 	add	$16, %rax
 	pmovmskb %xmm2, %edx
@@ -180,6 +249,9 @@ L(align16_start):
 	test	$0x3f, %rax
 	jz	L(align64_loop)
 
+# ifdef __CHKP__
+	bndcu	16(%rax), %bnd0
+# endif
 	pcmpeqb	16(%rax), %xmm3
 	add	$16, %rax
 	pmovmskb %xmm3, %edx
@@ -187,8 +259,12 @@ L(align16_start):
 	jnz	L(exit)
 
 	add	$16, %rax
+
 	.p2align 4
 	L(align64_loop):
+# ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+# endif
 	movaps	(%rax),	%xmm4
 	pminub	16(%rax),	%xmm4
 	movaps	32(%rax),	%xmm5
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index f170238b55..4311e8689c 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -91,6 +91,10 @@ __strchr_sse42:
 	CALL_MCOUNT
 	testb	%sil, %sil
 	je	__strend_sse4
+# ifdef __CHKP__
+	bndcl  (%rdi), %bnd0
+	bndcu  (%rdi), %bnd0
+# endif
 	pxor	%xmm2, %xmm2
 	movd	%esi, %xmm1
 	movl	%edi, %ecx
@@ -124,6 +128,9 @@ __strchr_sse42:
 	ja	L(return_null)
 L(unaligned_match):
 	addq	%rdi, %rax
+# ifdef __CHKP__
+	bndcu 	(%rax), %bnd0
+# endif
 	ret
 
 	.p2align 4
@@ -135,15 +142,27 @@ L(unaligned_no_match):
 L(loop):
 	addq	$16, %r8
 L(aligned_start):
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 	pcmpistri	$0x2, (%r8), %xmm1
 	jbe	L(wrap)
 	addq	$16, %r8
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 	pcmpistri	$0x2, (%r8), %xmm1
 	jbe	L(wrap)
 	addq	$16, %r8
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 	pcmpistri       $0x2, (%r8), %xmm1
 	jbe     L(wrap)
 	addq	$16, %r8
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 	pcmpistri	$0x2, (%r8), %xmm1
 	jbe	L(wrap)
 	jmp	L(loop)
@@ -159,6 +178,9 @@ L(return_null):
 	.p2align 4
 L(loop_exit):
 	leaq	(%r8,%rcx), %rax
+# ifdef __CHKP__
+	bndcu 	(%rax), %bnd0
+# endif
 	ret
 	cfi_endproc
 	.size	__strchr_sse42, .-__strchr_sse42
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c84f1c2b31..edfa915707 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -127,6 +127,14 @@ STRCMP_SSE42:
 	je	LABEL(Byte0)
 	mov	%rdx, %r11
 #endif
+
+#ifdef __CHKP__
+	bndcl 	(%rdi), %bnd0
+	bndcu 	(%rdi), %bnd0
+	bndcl 	(%rsi), %bnd1
+	bndcu 	(%rsi), %bnd1
+#endif
+
 	mov	%esi, %ecx
 	mov	%edi, %eax
 /* Use 64bit AND here to avoid long NOP padding.  */
@@ -210,6 +218,10 @@ LABEL(touppermask):
 #endif
 	add	$16, %rsi		/* prepare to search next 16 bytes */
 	add	$16, %rdi		/* prepare to search next 16 bytes */
+#ifdef __CHKP__
+	bndcu 	(%rdi), %bnd0
+	bndcu 	(%rsi), %bnd1
+#endif
 
 	/*
 	 * Determine source and destination string offsets from 16-byte
@@ -231,6 +243,11 @@ LABEL(crosscache):
 	mov	%edx, %r8d		/* r8d is offset flag for exit tail */
 	xchg	%ecx, %eax
 	xchg	%rsi, %rdi
+#ifdef __CHKP__
+	bndmov	%bnd0, %bnd2
+	bndmov	%bnd1, %bnd0
+	bndmov	%bnd2, %bnd1
+#endif
 LABEL(bigger):
 	movdqa	(%rdi), %xmm2
 	movdqa	(%rsi), %xmm1
@@ -280,6 +297,10 @@ LABEL(ashr_0):
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
 	.p2align 4
 LABEL(ashr_0_use):
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rdx), %bnd0
+	bndcu	-1(%rsi, %rdx), %bnd1
+#endif
 	movdqa	(%rdi,%rdx), %xmm0
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri      $0x1a,(%rsi,%rdx), %xmm0
@@ -295,6 +316,10 @@ LABEL(ashr_0_use):
 	jbe	LABEL(strcmp_exitz)
 #endif
 
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rdx), %bnd0
+	bndcu	-1(%rsi, %rdx), %bnd1
+#endif
 	movdqa	(%rdi,%rdx), %xmm0
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri      $0x1a,(%rsi,%rdx), %xmm0
@@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use):
 	jbe	LABEL(strcmp_exitz)
 #endif
 	lea	-16(%rdx, %rcx), %rcx
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rcx), %bnd0
+	bndcu	-1(%rsi, %rcx), %bnd1
+#endif
 	movzbl	(%rdi, %rcx), %eax
 	movzbl	(%rsi, %rcx), %edx
 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -362,6 +391,15 @@ LABEL(ashr_1):
 	and	$0xfff, %r10		/* offset into 4K page */
 	sub	$0x1000, %r10		/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_1_use)
+LABEL(ashr_1_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_1_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_1_use):
@@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$14, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_1_check)
+#else
 	ja	LABEL(nibble_ashr_1_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -450,6 +492,15 @@ LABEL(ashr_2):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_2_use)
+LABEL(ashr_2_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_2_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_2_use):
@@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$13, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_2_check)
+#else
 	ja	LABEL(nibble_ashr_2_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -539,6 +594,15 @@ LABEL(ashr_3):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_3_use)
+LABEL(ashr_3_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_3_restart_use)
+#endif
 
 LABEL(loop_ashr_3_use):
 	add	$16, %r10
@@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$12, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_3_check)
+#else
 	ja	LABEL(nibble_ashr_3_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -627,6 +695,15 @@ LABEL(ashr_4):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_4_use)
+LABEL(ashr_4_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_4_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_4_use):
@@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$11, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_4_check)
+#else
 	ja	LABEL(nibble_ashr_4_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -716,6 +797,15 @@ LABEL(ashr_5):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_5_use)
+LABEL(ashr_5_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_5_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_5_use):
@@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$10, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_5_check)
+#else
 	ja	LABEL(nibble_ashr_5_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -806,6 +900,15 @@ LABEL(ashr_6):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_6_use)
+LABEL(ashr_6_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_6_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_6_use):
@@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$9, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_6_check)
+#else
 	ja	LABEL(nibble_ashr_6_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -895,6 +1002,15 @@ LABEL(ashr_7):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_7_use)
+LABEL(ashr_7_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_7_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_7_use):
@@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$8, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_7_check)
+#else
 	ja	LABEL(nibble_ashr_7_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -984,6 +1104,15 @@ LABEL(ashr_8):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_8_use)
+LABEL(ashr_8_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_8_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_8_use):
@@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$7, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_8_check)
+#else
 	ja	LABEL(nibble_ashr_8_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1073,6 +1206,15 @@ LABEL(ashr_9):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_9_use)
+LABEL(ashr_9_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_9_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_9_use):
@@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$6, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_9_check)
+#else
 	ja	LABEL(nibble_ashr_9_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1163,6 +1309,15 @@ LABEL(ashr_10):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_10_use)
+LABEL(ashr_10_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_10_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_10_use):
@@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$5, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_10_check)
+#else
 	ja	LABEL(nibble_ashr_10_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1252,6 +1411,15 @@ LABEL(ashr_11):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_11_use)
+LABEL(ashr_11_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_11_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_11_use):
@@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$4, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_11_check)
+#else
 	ja	LABEL(nibble_ashr_11_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1341,6 +1513,15 @@ LABEL(ashr_12):
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_12_use)
+LABEL(ashr_12_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_12_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_12_use):
@@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$3, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_12_check)
+#else
 	ja	LABEL(nibble_ashr_12_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1431,6 +1616,15 @@ LABEL(ashr_13):
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_13_use)
+LABEL(ashr_13_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_13_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_13_use):
@@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$2, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_13_check)
+#else
 	ja	LABEL(nibble_ashr_13_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1521,6 +1719,15 @@ LABEL(ashr_14):
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_14_use)
+LABEL(ashr_14_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_14_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_14_use):
@@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$1, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_14_check)
+#else
 	ja	LABEL(nibble_ashr_14_restart_use)
+#endif
 
 	jmp	LABEL(nibble_ashr_exit_use)
 
@@ -1613,6 +1824,15 @@ LABEL(ashr_15):
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
 
 	mov	%rcx, %rdx	/* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+	bndcu	-16(%rdi, %rdx), %bnd0
+	bndcu	-16(%rsi, %rdx), %bnd1
+	jmp	LABEL(loop_ashr_15_use)
+LABEL(ashr_15_check):
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+	jmp	LABEL(nibble_ashr_15_restart_use)
+#endif
 
 	.p2align 4
 LABEL(loop_ashr_15_use):
@@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use):
 	jae	LABEL(nibble_ashr_exit_use)
 #endif
 	cmp	$0, %ecx
+#ifdef __CHKP__
+	ja	LABEL(ashr_15_check)
+#else
 	ja	LABEL(nibble_ashr_15_restart_use)
+#endif
 
 LABEL(nibble_ashr_exit_use):
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1691,6 +1915,11 @@ LABEL(exit_use):
 	test	%r8d, %r8d
 	jz	LABEL(ret_use)
 	xchg	%eax, %edx
+#ifdef __CHKP__
+	bndmov	%bnd0, %bnd2
+	bndmov	%bnd1, %bnd0
+	bndmov	%bnd2, %bnd1
+#endif
 LABEL(ret_use):
 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
 	leaq	_nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
@@ -1707,6 +1936,11 @@ LABEL(less32bytes):
 	test	%r8d, %r8d
 	jz	LABEL(ret)
 	xchg	%rsi, %rdi		/* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+	bndmov	%bnd0, %bnd2
+	bndmov	%bnd1, %bnd0
+	bndmov	%bnd2, %bnd1
+#endif
 
 	.p2align 4
 LABEL(ret):
@@ -1717,6 +1951,10 @@ LABEL(less16bytes):
 	sub	%rdx, %r11
 	jbe	LABEL(strcmp_exitz)
 #endif
+#ifdef __CHKP__
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+#endif
 	movzbl	(%rsi, %rdx), %ecx
 	movzbl	(%rdi, %rdx), %eax
 
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 7710173c68..e6baee92db 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -33,7 +33,7 @@
 	lea	TABLE(%rip), %r11;                              \
 	movslq	(%r11, INDEX, SCALE), %rcx;                     \
 	lea	(%r11, %rcx), %rcx;                             \
-	jmp	*%rcx
+	jmp *%rcx
 
 # ifndef USE_AS_STRCAT
 
@@ -51,6 +51,16 @@ ENTRY (STRCPY)
 
 # endif
 
+# ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+	bndcl	(%rsi), %bnd1
+	bndcu	(%rsi), %bnd1
+#  if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+	bndcu	-1(%rdi, %rdx), %bnd0
+#  endif
+# endif
+
 	and	$63, %rcx
 	cmp	$32, %rcx
 	jbe	L(SourceStringAlignmentLess32)
@@ -79,6 +89,9 @@ ENTRY (STRCPY)
 	test	%rdx, %rdx
 	jnz	L(CopyFrom1To16BytesTail)
 
+# ifdef __CHKP__
+	bndcu	16(%rsi), %bnd1
+# endif
 	pcmpeqb	16(%rsi), %xmm0
 	pmovmskb %xmm0, %rdx
 
@@ -91,6 +104,9 @@ ENTRY (STRCPY)
 	jnz	L(CopyFrom1To32Bytes)
 
 	movdqu	(%rsi, %rcx), %xmm1   /* copy 16 bytes */
+# ifdef __CHKP__
+	bndcu	15(%rdi), %bnd0
+# endif
 	movdqu	%xmm1, (%rdi)
 
 /* If source address alignment != destination address alignment */
@@ -101,6 +117,10 @@ L(Unalign16Both):
 	add	%rcx, %r8
 # endif
 	mov	$16, %rcx
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movdqa	(%rsi, %rcx), %xmm1
 	movaps	16(%rsi, %rcx), %xmm2
 	movdqu	%xmm1, (%rdi, %rcx)
@@ -118,6 +138,10 @@ L(Unalign16Both):
 	jnz	L(CopyFrom1To16Bytes)
 # endif
 
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movaps	16(%rsi, %rcx), %xmm3
 	movdqu	%xmm2, (%rdi, %rcx)
 	pcmpeqb	%xmm3, %xmm0
@@ -134,6 +158,10 @@ L(Unalign16Both):
 	jnz	L(CopyFrom1To16Bytes)
 # endif
 
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movaps	16(%rsi, %rcx), %xmm4
 	movdqu	%xmm3, (%rdi, %rcx)
 	pcmpeqb	%xmm4, %xmm0
@@ -150,6 +178,10 @@ L(Unalign16Both):
 	jnz	L(CopyFrom1To16Bytes)
 # endif
 
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movaps	16(%rsi, %rcx), %xmm1
 	movdqu	%xmm4, (%rdi, %rcx)
 	pcmpeqb	%xmm1, %xmm0
@@ -166,6 +198,10 @@ L(Unalign16Both):
 	jnz	L(CopyFrom1To16Bytes)
 # endif
 
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movaps	16(%rsi, %rcx), %xmm2
 	movdqu	%xmm1, (%rdi, %rcx)
 	pcmpeqb	%xmm2, %xmm0
@@ -182,6 +218,10 @@ L(Unalign16Both):
 	jnz	L(CopyFrom1To16Bytes)
 # endif
 
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movaps	16(%rsi, %rcx), %xmm3
 	movdqu	%xmm2, (%rdi, %rcx)
 	pcmpeqb	%xmm3, %xmm0
@@ -198,6 +238,10 @@ L(Unalign16Both):
 	jnz	L(CopyFrom1To16Bytes)
 # endif
 
+# ifdef __CHKP__
+	bndcu	16(%rsi, %rcx), %bnd1
+	bndcu	15(%rdi, %rcx), %bnd0
+# endif
 	movdqu	%xmm3, (%rdi, %rcx)
 	mov	%rsi, %rdx
 	lea	16(%rsi, %rcx), %rsi
@@ -208,6 +252,9 @@ L(Unalign16Both):
 	lea	128(%r8, %rdx), %r8
 # endif
 L(Unaligned64Loop):
+# ifdef __CHKP__
+	bndcu	48(%rsi), %bnd1
+# endif
 	movaps	(%rsi), %xmm2
 	movaps	%xmm2, %xmm4
 	movaps	16(%rsi), %xmm5
@@ -229,6 +276,10 @@ L(Unaligned64Loop):
 L(Unaligned64Loop_start):
 	add	$64, %rdi
 	add	$64, %rsi
+# ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+	bndcu	(%rdi), %bnd0
+# endif
 	movdqu	%xmm4, -64(%rdi)
 	movaps	(%rsi), %xmm2
 	movdqa	%xmm2, %xmm4
@@ -271,16 +322,28 @@ L(Unaligned64Leave):
 	jnz	L(CopyFrom1To16BytesUnaligned_32)
 
 	bsf	%rcx, %rdx
+# ifdef __CHKP__
+	bndcu	47(%rdi), %bnd0
+# endif
 	movdqu	%xmm4, (%rdi)
 	movdqu	%xmm5, 16(%rdi)
 	movdqu	%xmm6, 32(%rdi)
 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
 # ifdef USE_AS_STPCPY
+#  ifdef __CHKP__
+	bndcu	48(%rdi, %rdx), %bnd0
+#  endif
 	lea	48(%rdi, %rdx), %rax
 # endif
+#  ifdef __CHKP__
+	bndcu	63(%rdi), %bnd0
+#  endif
 	movdqu	%xmm7, 48(%rdi)
 	add	$15, %r8
 	sub	%rdx, %r8
+#  ifdef __CHKP__
+	bndcu	49(%rdi, %rdx), %bnd0
+#  endif
 	lea	49(%rdi, %rdx), %rdi
 	jmp	L(StrncpyFillTailWithZero)
 # else
@@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32):
 	test	%rdx, %rdx
 	jnz	L(CopyFrom1To16BytesTail1)
 
+# ifdef __CHKP__
+	bndcu	16(%rsi), %bnd1
+	bndcu	15(%rdi), %bnd0
+# endif
 	pcmpeqb	%xmm2, %xmm0
 	movdqu	%xmm1, (%rdi)
 	pmovmskb %xmm0, %rdx
@@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0):
 # ifdef USE_AS_STPCPY
 	lea	(%rdi, %rdx), %rax
 # endif
+#  ifdef __CHKP__
+	bndcu	15(%rdi), %bnd0
+#  endif
 	movdqu	%xmm4, (%rdi)
 	add	$63, %r8
 	sub	%rdx, %r8
@@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0):
 	.p2align 4
 L(CopyFrom1To16BytesUnaligned_16):
 	bsf	%rcx, %rdx
+#  ifdef __CHKP__
+	bndcu	31(%rdi), %bnd0
+#  endif
 	movdqu	%xmm4, (%rdi)
 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
 # ifdef USE_AS_STPCPY
@@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16):
 	.p2align 4
 L(CopyFrom1To16BytesUnaligned_32):
 	bsf	%rdx, %rdx
+#  ifdef __CHKP__
+	bndcu	47(%rdi), %bnd0
+#  endif
 	movdqu	%xmm4, (%rdi)
 	movdqu	%xmm5, 16(%rdi)
 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
@@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
 
 	.p2align 4
 L(Exit1):
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+# endif
 	mov	%dh, (%rdi)
 # ifdef USE_AS_STPCPY
 	lea	(%rdi), %rax
@@ -543,6 +622,9 @@ L(Exit1):
 	.p2align 4
 L(Exit2):
 	mov	(%rsi), %dx
+# ifdef __CHKP__
+	bndcu	1(%rdi), %bnd0
+# endif
 	mov	%dx, (%rdi)
 # ifdef USE_AS_STPCPY
 	lea	1(%rdi), %rax
@@ -557,6 +639,9 @@ L(Exit2):
 	.p2align 4
 L(Exit3):
 	mov	(%rsi), %cx
+# ifdef __CHKP__
+	bndcu	2(%rdi), %bnd0
+# endif
 	mov	%cx, (%rdi)
 	mov	%dh, 2(%rdi)
 # ifdef USE_AS_STPCPY
@@ -572,6 +657,9 @@ L(Exit3):
 	.p2align 4
 L(Exit4):
 	mov	(%rsi), %edx
+# ifdef __CHKP__
+	bndcu	3(%rdi), %bnd0
+# endif
 	mov	%edx, (%rdi)
 # ifdef USE_AS_STPCPY
 	lea	3(%rdi), %rax
@@ -586,6 +674,9 @@ L(Exit4):
 	.p2align 4
 L(Exit5):
 	mov	(%rsi), %ecx
+# ifdef __CHKP__
+	bndcu	4(%rdi), %bnd0
+# endif
 	mov	%dh, 4(%rdi)
 	mov	%ecx, (%rdi)
 # ifdef USE_AS_STPCPY
@@ -602,6 +693,9 @@ L(Exit5):
 L(Exit6):
 	mov	(%rsi), %ecx
 	mov	4(%rsi), %dx
+# ifdef __CHKP__
+	bndcu	5(%rdi), %bnd0
+# endif
 	mov	%ecx, (%rdi)
 	mov	%dx, 4(%rdi)
 # ifdef USE_AS_STPCPY
@@ -618,6 +712,9 @@ L(Exit6):
 L(Exit7):
 	mov	(%rsi), %ecx
 	mov	3(%rsi), %edx
+# ifdef __CHKP__
+	bndcu	6(%rdi), %bnd0
+# endif
 	mov	%ecx, (%rdi)
 	mov	%edx, 3(%rdi)
 # ifdef USE_AS_STPCPY
@@ -633,6 +730,9 @@ L(Exit7):
 	.p2align 4
 L(Exit8):
 	mov	(%rsi), %rdx
+# ifdef __CHKP__
+	bndcu	7(%rdi), %bnd0
+# endif
 	mov	%rdx, (%rdi)
 # ifdef USE_AS_STPCPY
 	lea	7(%rdi), %rax
@@ -647,6 +747,9 @@ L(Exit8):
 	.p2align 4
 L(Exit9):
 	mov	(%rsi), %rcx
+# ifdef __CHKP__
+	bndcu	8(%rdi), %bnd0
+# endif
 	mov	%dh, 8(%rdi)
 	mov	%rcx, (%rdi)
 # ifdef USE_AS_STPCPY
@@ -663,6 +766,9 @@ L(Exit9):
 L(Exit10):
 	mov	(%rsi), %rcx
 	mov	8(%rsi), %dx
+# ifdef __CHKP__
+	bndcu	9(%rdi), %bnd0
+# endif
 	mov	%rcx, (%rdi)
 	mov	%dx, 8(%rdi)
 # ifdef USE_AS_STPCPY
@@ -679,6 +785,9 @@ L(Exit10):
 L(Exit11):
 	mov	(%rsi), %rcx
 	mov	7(%rsi), %edx
+# ifdef __CHKP__
+	bndcu	10(%rdi), %bnd0
+# endif
 	mov	%rcx, (%rdi)
 	mov	%edx, 7(%rdi)
 # ifdef USE_AS_STPCPY
@@ -695,6 +804,9 @@ L(Exit11):
 L(Exit12):
 	mov	(%rsi), %rcx
 	mov	8(%rsi), %edx
+# ifdef __CHKP__
+	bndcu	11(%rdi), %bnd0
+# endif
 	mov	%rcx, (%rdi)
 	mov	%edx, 8(%rdi)
 # ifdef USE_AS_STPCPY
@@ -711,6 +823,9 @@ L(Exit12):
 L(Exit13):
 	mov	(%rsi), %rcx
 	mov	5(%rsi), %rdx
+# ifdef __CHKP__
+	bndcu	12(%rdi), %bnd0
+# endif
 	mov	%rcx, (%rdi)
 	mov	%rdx, 5(%rdi)
 # ifdef USE_AS_STPCPY
@@ -727,6 +842,9 @@ L(Exit13):
 L(Exit14):
 	mov	(%rsi), %rcx
 	mov	6(%rsi), %rdx
+# ifdef __CHKP__
+	bndcu	13(%rdi), %bnd0
+# endif
 	mov	%rcx, (%rdi)
 	mov	%rdx, 6(%rdi)
 # ifdef USE_AS_STPCPY
@@ -743,6 +861,9 @@ L(Exit14):
 L(Exit15):
 	mov	(%rsi), %rcx
 	mov	7(%rsi), %rdx
+# ifdef __CHKP__
+	bndcu	14(%rdi), %bnd0
+# endif
 	mov	%rcx, (%rdi)
 	mov	%rdx, 7(%rdi)
 # ifdef USE_AS_STPCPY
@@ -758,6 +879,9 @@ L(Exit15):
 	.p2align 4
 L(Exit16):
 	movdqu	(%rsi), %xmm0
+# ifdef __CHKP__
+	bndcu	15(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 # ifdef USE_AS_STPCPY
 	lea	15(%rdi), %rax
@@ -772,6 +896,9 @@ L(Exit16):
 	.p2align 4
 L(Exit17):
 	movdqu	(%rsi), %xmm0
+# ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%dh, 16(%rdi)
 # ifdef USE_AS_STPCPY
@@ -788,6 +915,9 @@ L(Exit17):
 L(Exit18):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %cx
+# ifdef __CHKP__
+	bndcu	17(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%cx, 16(%rdi)
 # ifdef USE_AS_STPCPY
@@ -804,6 +934,9 @@ L(Exit18):
 L(Exit19):
 	movdqu	(%rsi), %xmm0
 	mov	15(%rsi), %ecx
+# ifdef __CHKP__
+	bndcu	18(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%ecx, 15(%rdi)
 # ifdef USE_AS_STPCPY
@@ -820,6 +953,9 @@ L(Exit19):
 L(Exit20):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %ecx
+# ifdef __CHKP__
+	bndcu	19(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%ecx, 16(%rdi)
 # ifdef USE_AS_STPCPY
@@ -836,6 +972,9 @@ L(Exit20):
 L(Exit21):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %ecx
+# ifdef __CHKP__
+	bndcu	20(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%ecx, 16(%rdi)
 	mov	%dh, 20(%rdi)
@@ -853,6 +992,9 @@ L(Exit21):
 L(Exit22):
 	movdqu	(%rsi), %xmm0
 	mov	14(%rsi), %rcx
+# ifdef __CHKP__
+	bndcu	21(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rcx, 14(%rdi)
 # ifdef USE_AS_STPCPY
@@ -869,6 +1011,9 @@ L(Exit22):
 L(Exit23):
 	movdqu	(%rsi), %xmm0
 	mov	15(%rsi), %rcx
+# ifdef __CHKP__
+	bndcu	22(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rcx, 15(%rdi)
 # ifdef USE_AS_STPCPY
@@ -885,6 +1030,9 @@ L(Exit23):
 L(Exit24):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %rcx
+# ifdef __CHKP__
+	bndcu	23(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rcx, 16(%rdi)
 # ifdef USE_AS_STPCPY
@@ -901,6 +1049,9 @@ L(Exit24):
 L(Exit25):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %rcx
+# ifdef __CHKP__
+	bndcu	24(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rcx, 16(%rdi)
 	mov	%dh, 24(%rdi)
@@ -919,6 +1070,9 @@ L(Exit26):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %rdx
 	mov	24(%rsi), %cx
+# ifdef __CHKP__
+	bndcu	25(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rdx, 16(%rdi)
 	mov	%cx, 24(%rdi)
@@ -937,6 +1091,9 @@ L(Exit27):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %rdx
 	mov	23(%rsi), %ecx
+# ifdef __CHKP__
+	bndcu	26(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rdx, 16(%rdi)
 	mov	%ecx, 23(%rdi)
@@ -955,6 +1112,9 @@ L(Exit28):
 	movdqu	(%rsi), %xmm0
 	mov	16(%rsi), %rdx
 	mov	24(%rsi), %ecx
+# ifdef __CHKP__
+	bndcu	27(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	mov	%rdx, 16(%rdi)
 	mov	%ecx, 24(%rdi)
@@ -972,6 +1132,9 @@ L(Exit28):
 L(Exit29):
 	movdqu	(%rsi), %xmm0
 	movdqu	13(%rsi), %xmm2
+# ifdef __CHKP__
+	bndcu	28(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	movdqu	%xmm2, 13(%rdi)
 # ifdef USE_AS_STPCPY
@@ -988,6 +1151,9 @@ L(Exit29):
 L(Exit30):
 	movdqu	(%rsi), %xmm0
 	movdqu	14(%rsi), %xmm2
+# ifdef __CHKP__
+	bndcu	29(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	movdqu	%xmm2, 14(%rdi)
 # ifdef USE_AS_STPCPY
@@ -1004,6 +1170,9 @@ L(Exit30):
 L(Exit31):
 	movdqu	(%rsi), %xmm0
 	movdqu	15(%rsi), %xmm2
+# ifdef __CHKP__
+	bndcu	30(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	movdqu	%xmm2, 15(%rdi)
 # ifdef USE_AS_STPCPY
@@ -1020,6 +1189,9 @@ L(Exit31):
 L(Exit32):
 	movdqu	(%rsi), %xmm0
 	movdqu	16(%rsi), %xmm2
+# ifdef __CHKP__
+	bndcu	31(%rdi), %bnd0
+# endif
 	movdqu	%xmm0, (%rdi)
 	movdqu	%xmm2, 16(%rdi)
 # ifdef USE_AS_STPCPY
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 3f92a41ef9..1fed105bf0 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -97,6 +97,10 @@ __strrchr_sse42:
 	CALL_MCOUNT
 	testb	%sil, %sil
 	je	__strend_sse4
+# ifdef __CHKP__
+	bndcl  (%rdi), %bnd0
+	bndcu  (%rdi), %bnd0
+# endif
 	xor	%eax,%eax	/* RAX has the last occurrence of s.  */
 	movd	%esi, %xmm1
 	punpcklbw	%xmm1, %xmm1
@@ -135,6 +139,9 @@ L(unaligned_no_byte):
 	   contain the NULL terminator.  */
 	jg	L(exit)
 	addq	$16, %r8
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 
 /* Loop start on aligned string.  */
 	.p2align 4
@@ -142,6 +149,9 @@ L(loop):
 	pcmpistri	$0x4a, (%r8), %xmm1
 	jbe	L(match_or_eos)
 	addq	$16, %r8
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 	jmp	L(loop)
 	.p2align 4
 L(match_or_eos):
@@ -149,11 +159,17 @@ L(match_or_eos):
 L(match_no_eos):
 	leaq	(%r8,%rcx), %rax
 	addq	$16, %r8
+# ifdef __CHKP__
+	bndcu 	(%r8), %bnd0
+# endif
 	jmp     L(loop)
 	.p2align 4
 L(had_eos):
 	jnc     L(exit)
 	leaq	(%r8,%rcx), %rax
+# ifdef __CHKP__
+	bndcu  	(%rax), %bnd0
+# endif
 	.p2align 4
 L(exit):
 	ret
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index b7de092228..77889dd555 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3)
 
 	mov	%rsi, %rcx
 	mov	%rdi, %rdx
+# ifdef __CHKP__
+	bndcl  	(%rdi), %bnd0
+	bndcl  	(%rsi), %bnd1
+	bndcu  	(%rsi), %bnd1
+# endif
 
 	cmpl	$0, (%rcx)
 	jz	L(Exit4)
+# ifdef __CHKP__
+	bndcu  	4(%rcx), %bnd1
+# endif
 	cmpl	$0, 4(%rcx)
 	jz	L(Exit8)
+# ifdef __CHKP__
+	bndcu  	8(%rcx), %bnd1
+# endif
 	cmpl	$0, 8(%rcx)
 	jz	L(Exit12)
+# ifdef __CHKP__
+	bndcu  	12(%rcx), %bnd1
+# endif
 	cmpl	$0, 12(%rcx)
 	jz	L(Exit16)
 
@@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3)
 
 	pxor	%xmm0, %xmm0
 	mov	(%rcx), %r9
+# ifdef __CHKP__
+	bndcu  	7(%rdx), %bnd0
+# endif
 	mov	%r9, (%rdx)
 
+# ifdef __CHKP__
+	bndcu  	(%rsi), %bnd1
+# endif
 	pcmpeqd	(%rsi), %xmm0
 	mov	8(%rcx), %r9
+# ifdef __CHKP__
+	bndcu  	15(%rdx), %bnd0
+# endif
 	mov	%r9, 8(%rdx)
 
 	pmovmskb %xmm0, %rax
@@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3)
 	jmp	L(Shl12)
 
 L(Align16Both):
+# ifdef __CHKP__
+	bndcu  	16(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	(%rcx), %xmm1
 	movaps	16(%rcx), %xmm2
 	movaps	%xmm1, (%rdx)
@@ -82,6 +109,10 @@ L(Align16Both):
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	16(%rcx, %rsi), %bnd1
+	bndcu  	15(%rdx, %rsi), %bnd0
+# endif
 	movaps	16(%rcx, %rsi), %xmm3
 	movaps	%xmm2, (%rdx, %rsi)
 	pcmpeqd	%xmm3, %xmm0
@@ -91,6 +122,10 @@ L(Align16Both):
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	16(%rcx, %rsi), %bnd1
+	bndcu  	15(%rdx, %rsi), %bnd0
+# endif
 	movaps	16(%rcx, %rsi), %xmm4
 	movaps	%xmm3, (%rdx, %rsi)
 	pcmpeqd	%xmm4, %xmm0
@@ -100,6 +135,10 @@ L(Align16Both):
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	16(%rcx, %rsi), %bnd1
+	bndcu  	15(%rdx, %rsi), %bnd0
+# endif
 	movaps	16(%rcx, %rsi), %xmm1
 	movaps	%xmm4, (%rdx, %rsi)
 	pcmpeqd	%xmm1, %xmm0
@@ -109,6 +148,10 @@ L(Align16Both):
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	16(%rcx, %rsi), %bnd1
+	bndcu  	15(%rdx, %rsi), %bnd0
+# endif
 	movaps	16(%rcx, %rsi), %xmm2
 	movaps	%xmm1, (%rdx, %rsi)
 	pcmpeqd	%xmm2, %xmm0
@@ -118,6 +161,10 @@ L(Align16Both):
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	16(%rcx, %rsi), %bnd1
+	bndcu  	15(%rdx, %rsi), %bnd0
+# endif
 	movaps	16(%rcx, %rsi), %xmm3
 	movaps	%xmm2, (%rdx, %rsi)
 	pcmpeqd	%xmm3, %xmm0
@@ -127,6 +174,10 @@ L(Align16Both):
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	16(%rcx, %rsi), %bnd1
+	bndcu  	15(%rdx, %rsi), %bnd0
+# endif
 	movaps	%xmm3, (%rdx, %rsi)
 	mov	%rcx, %rax
 	lea	16(%rcx, %rsi), %rcx
@@ -138,6 +189,10 @@ L(Align16Both):
 
 	.p2align 4
 L(Aligned64Loop):
+# ifdef __CHKP__
+	bndcu  	(%rcx), %bnd1
+	bndcu  	63(%rdx), %bnd0
+# endif
 	movaps	(%rcx), %xmm2
 	movaps	%xmm2, %xmm4
 	movaps	16(%rcx), %xmm5
@@ -168,6 +223,9 @@ L(Aligned64Leave):
 	pcmpeqd	%xmm5, %xmm0
 
 	pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+	bndcu  	-49(%rdx), %bnd0
+# endif
 	movaps	%xmm4, -64(%rdx)
 	test	%rax, %rax
 	lea	16(%rsi), %rsi
@@ -176,11 +234,17 @@ L(Aligned64Leave):
 	pcmpeqd	%xmm6, %xmm0
 
 	pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+	bndcu  	-33(%rdx), %bnd0
+# endif
 	movaps	%xmm5, -48(%rdx)
 	test	%rax, %rax
 	lea	16(%rsi), %rsi
 	jnz	L(CopyFrom1To16Bytes)
 
+# ifdef __CHKP__
+	bndcu  	-17(%rdx), %bnd0
+# endif
 	movaps	%xmm6, -32(%rdx)
 	pcmpeqd	%xmm7, %xmm0
 
@@ -190,11 +254,17 @@ L(Aligned64Leave):
 	jnz	L(CopyFrom1To16Bytes)
 
 	mov	$-0x40, %rsi
+# ifdef __CHKP__
+	bndcu  	-1(%rdx), %bnd0
+# endif
 	movaps	%xmm7, -16(%rdx)
 	jmp	L(Aligned64Loop)
 
 	.p2align 4
 L(Shl4):
+# ifdef __CHKP__
+	bndcu  	12(%rcx), %bnd1
+# endif
 	movaps	-4(%rcx), %xmm1
 	movaps	12(%rcx), %xmm2
 L(Shl4Start):
@@ -206,6 +276,10 @@ L(Shl4Start):
 	jnz	L(Shl4LoopExit)
 
 	palignr	$4, %xmm1, %xmm2
+# ifdef __CHKP__
+	bndcu  	28(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	28(%rcx), %xmm2
 
@@ -219,6 +293,10 @@ L(Shl4Start):
 	jnz	L(Shl4LoopExit)
 
 	palignr	$4, %xmm3, %xmm2
+# ifdef __CHKP__
+	bndcu  	28(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	28(%rcx), %xmm2
 
@@ -232,6 +310,10 @@ L(Shl4Start):
 	jnz	L(Shl4LoopExit)
 
 	palignr	$4, %xmm1, %xmm2
+# ifdef __CHKP__
+	bndcu  	28(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	28(%rcx), %xmm2
 
@@ -244,6 +326,9 @@ L(Shl4Start):
 	jnz	L(Shl4LoopExit)
 
 	palignr	$4, %xmm3, %xmm2
+# ifdef __CHKP__
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	lea	28(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -258,6 +343,9 @@ L(Shl4Start):
 
 	.p2align 4
 L(Shl4LoopStart):
+# ifdef __CHKP__
+	bndcu  	12(%rcx), %bnd1
+# endif
 	movaps	12(%rcx), %xmm2
 	movaps	28(%rcx), %xmm3
 	movaps	%xmm3, %xmm6
@@ -279,6 +367,9 @@ L(Shl4LoopStart):
 	lea	64(%rcx), %rcx
 	palignr	$4, %xmm1, %xmm2
 	movaps	%xmm7, %xmm1
+# ifdef __CHKP__
+	bndcu  	63(%rdx), %bnd0
+# endif
 	movaps	%xmm5, 48(%rdx)
 	movaps	%xmm4, 32(%rdx)
 	movaps	%xmm3, 16(%rdx)
@@ -287,6 +378,10 @@ L(Shl4LoopStart):
 	jmp	L(Shl4LoopStart)
 
 L(Shl4LoopExit):
+# ifdef __CHKP__
+	bndcu  	-4(%rcx), %bnd1
+	bndcu  	11(%rdx), %bnd0
+# endif
 	movdqu	-4(%rcx), %xmm1
 	mov	$12, %rsi
 	movdqu	%xmm1, -4(%rdx)
@@ -294,6 +389,9 @@ L(Shl4LoopExit):
 
 	.p2align 4
 L(Shl8):
+# ifdef __CHKP__
+	bndcu  	8(%rcx), %bnd1
+# endif
 	movaps	-8(%rcx), %xmm1
 	movaps	8(%rcx), %xmm2
 L(Shl8Start):
@@ -305,6 +403,10 @@ L(Shl8Start):
 	jnz	L(Shl8LoopExit)
 
 	palignr	$8, %xmm1, %xmm2
+# ifdef __CHKP__
+	bndcu  	24(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	24(%rcx), %xmm2
 
@@ -318,6 +420,10 @@ L(Shl8Start):
 	jnz	L(Shl8LoopExit)
 
 	palignr	$8, %xmm3, %xmm2
+# ifdef __CHKP__
+	bndcu  	24(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	24(%rcx), %xmm2
 
@@ -331,6 +437,10 @@ L(Shl8Start):
 	jnz	L(Shl8LoopExit)
 
 	palignr	$8, %xmm1, %xmm2
+# ifdef __CHKP__
+	bndcu  	24(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	24(%rcx), %xmm2
 
@@ -343,6 +453,10 @@ L(Shl8Start):
 	jnz	L(Shl8LoopExit)
 
 	palignr	$8, %xmm3, %xmm2
+# ifdef __CHKP__
+	bndcu  	24(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	lea	24(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -357,6 +471,9 @@ L(Shl8Start):
 
 	.p2align 4
 L(Shl8LoopStart):
+# ifdef __CHKP__
+	bndcu  	8(%rcx), %bnd1
+# endif
 	movaps	8(%rcx), %xmm2
 	movaps	24(%rcx), %xmm3
 	movaps	%xmm3, %xmm6
@@ -378,6 +495,9 @@ L(Shl8LoopStart):
 	lea	64(%rcx), %rcx
 	palignr	$8, %xmm1, %xmm2
 	movaps	%xmm7, %xmm1
+# ifdef __CHKP__
+	bndcu  	63(%rdx), %bnd0
+# endif
 	movaps	%xmm5, 48(%rdx)
 	movaps	%xmm4, 32(%rdx)
 	movaps	%xmm3, 16(%rdx)
@@ -386,6 +506,10 @@ L(Shl8LoopStart):
 	jmp	L(Shl8LoopStart)
 
 L(Shl8LoopExit):
+# ifdef __CHKP__
+	bndcu  	(%rcx), %bnd1
+	bndcu  	7(%rdx), %bnd0
+# endif
 	mov	(%rcx), %r9
 	mov	$8, %rsi
 	mov	%r9, (%rdx)
@@ -393,6 +517,9 @@ L(Shl8LoopExit):
 
 	.p2align 4
 L(Shl12):
+# ifdef __CHKP__
+	bndcu  	4(%rcx), %bnd1
+# endif
 	movaps	-12(%rcx), %xmm1
 	movaps	4(%rcx), %xmm2
 L(Shl12Start):
@@ -404,6 +531,10 @@ L(Shl12Start):
 	jnz	L(Shl12LoopExit)
 
 	palignr	$12, %xmm1, %xmm2
+# ifdef __CHKP__
+	bndcu  	20(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	20(%rcx), %xmm2
 
@@ -417,6 +548,10 @@ L(Shl12Start):
 	jnz	L(Shl12LoopExit)
 
 	palignr	$12, %xmm3, %xmm2
+# ifdef __CHKP__
+	bndcu  	20(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	20(%rcx), %xmm2
 
@@ -430,6 +565,10 @@ L(Shl12Start):
 	jnz	L(Shl12LoopExit)
 
 	palignr	$12, %xmm1, %xmm2
+# ifdef __CHKP__
+	bndcu  	20(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	movaps	20(%rcx), %xmm2
 
@@ -442,6 +581,10 @@ L(Shl12Start):
 	jnz	L(Shl12LoopExit)
 
 	palignr	$12, %xmm3, %xmm2
+# ifdef __CHKP__
+	bndcu  	20(%rcx), %bnd1
+	bndcu  	15(%rdx), %bnd0
+# endif
 	movaps	%xmm2, (%rdx)
 	lea	20(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -456,6 +599,9 @@ L(Shl12Start):
 
 	.p2align 4
 L(Shl12LoopStart):
+# ifdef __CHKP__
+	bndcu  	4(%rcx), %bnd1
+# endif
 	movaps	4(%rcx), %xmm2
 	movaps	20(%rcx), %xmm3
 	movaps	%xmm3, %xmm6
@@ -476,6 +622,9 @@ L(Shl12LoopStart):
 	lea	64(%rcx), %rcx
 	palignr	$12, %xmm1, %xmm2
 	movaps	%xmm7, %xmm1
+# ifdef __CHKP__
+	bndcu  	63(%rdx), %bnd0
+# endif
 	movaps	%xmm5, 48(%rdx)
 	movaps	%xmm4, 32(%rdx)
 	movaps	%xmm3, 16(%rdx)
@@ -484,6 +633,10 @@ L(Shl12LoopStart):
 	jmp	L(Shl12LoopStart)
 
 L(Shl12LoopExit):
+# ifdef __CHKP__
+	bndcu  	(%rcx), %bnd1
+	bndcu  	3(%rdx), %bnd0
+# endif
 	mov	(%rcx), %r9d
 	mov	$4, %rsi
 	mov	%r9d, (%rdx)
@@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes):
 	jnz	L(Exit4)
 
 	mov	(%rcx), %rax
+# ifdef __CHKP__
+	bndcu  	7(%rdx), %bnd0
+# endif
 	mov	%rax, (%rdx)
 	mov	%rdi, %rax
 	ret
@@ -510,6 +666,9 @@ L(ExitHigh):
 	jnz	L(Exit12)
 
 	mov	(%rcx), %rax
+# ifdef __CHKP__
+	bndcu  	15(%rdx), %bnd0
+# endif
 	mov	%rax, (%rdx)
 	mov	8(%rcx), %rax
 	mov	%rax, 8(%rdx)
@@ -519,6 +678,9 @@ L(ExitHigh):
 	.p2align 4
 L(Exit4):
 	movl	(%rcx), %eax
+# ifdef __CHKP__
+	bndcu  	3(%rdx), %bnd0
+# endif
 	movl	%eax, (%rdx)
 	mov	%rdi, %rax
 	ret
@@ -526,6 +688,9 @@ L(Exit4):
 	.p2align 4
 L(Exit8):
 	mov	(%rcx), %rax
+# ifdef __CHKP__
+	bndcu  	7(%rdx), %bnd0
+# endif
 	mov	%rax, (%rdx)
 	mov	%rdi, %rax
 	ret
@@ -533,6 +698,9 @@ L(Exit8):
 	.p2align 4
 L(Exit12):
 	mov	(%rcx), %rax
+# ifdef __CHKP__
+	bndcu  	11(%rdx), %bnd0
+# endif
 	mov	%rax, (%rdx)
 	mov	8(%rcx), %eax
 	mov	%eax, 8(%rdx)
@@ -542,6 +710,9 @@ L(Exit12):
 	.p2align 4
 L(Exit16):
 	mov	(%rcx), %rax
+# ifdef __CHKP__
+	bndcu  	15(%rdx), %bnd0
+# endif
 	mov	%rax, (%rdx)
 	mov	8(%rcx), %rax
 	mov	%rax, 8(%rdx)
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index f4d559155c..2f4cb25e00 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -20,11 +20,23 @@
 
 #include <sysdep.h>
 
+#ifdef __CHKP__
+# define RETURN \
+        bndcu  (%rax), %bnd0; \
+        ret
+#else
+# define RETURN ret
+#endif
+
 	.text
 ENTRY (rawmemchr)
 	movd	%rsi, %xmm1
 	mov	%rdi, %rcx
 
+#ifdef __CHKP__
+	bndcl  (%rdi), %bnd0
+#endif
+
 	punpcklbw %xmm1, %xmm1
 	punpcklbw %xmm1, %xmm1
 
@@ -63,7 +75,7 @@ L(crosscache):
 
 	add	%rdi, %rax
 	add	%rcx, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(unaligned_no_match):
@@ -71,24 +83,36 @@ L(unaligned_no_match):
 
 	.p2align 4
 L(loop_prolog):
+#ifdef __CHKP__
+	bndcu 	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu 	16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+	bndcu 	32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
+#ifdef __CHKP__
+	bndcu 	48(%rdi), %bnd0
+#endif
 	movdqa	48(%rdi), %xmm4
 	pcmpeqb	%xmm1, %xmm4
 	add	$64, %rdi
@@ -99,24 +123,36 @@ L(loop_prolog):
 	test	$0x3f, %rdi
 	jz	L(align64_loop)
 
+#ifdef __CHKP__
+	bndcu 	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu 	16(%rdi), %bnd0
+#endif
 	movdqa	16(%rdi), %xmm2
 	pcmpeqb	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
+#ifdef __CHKP__
+	bndcu 	32(%rdi), %bnd0
+#endif
 	movdqa	32(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
+#ifdef __CHKP__
+	bndcu 	48(%rdi), %bnd0
+#endif
 	movdqa	48(%rdi), %xmm3
 	pcmpeqb	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
@@ -129,6 +165,9 @@ L(loop_prolog):
 
 	.p2align 4
 L(align64_loop):
+#ifdef __CHKP__
+	bndcu 	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	movdqa	16(%rdi), %xmm2
 	movdqa	32(%rdi), %xmm3
@@ -170,36 +209,36 @@ L(align64_loop):
 	pmovmskb %xmm1, %eax
 	bsf	%eax, %eax
 	lea	48(%rdi, %rax), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches0):
 	bsf	%eax, %eax
 	lea	-16(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches):
 	bsf	%eax, %eax
 	add	%rdi, %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches16):
 	bsf	%eax, %eax
 	lea	16(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(matches32):
 	bsf	%eax, %eax
 	lea	32(%rax, %rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(return_null):
 	xor	%rax, %rax
-	ret
+	RETURN
 
 END (rawmemchr)
 
diff --git a/sysdeps/x86_64/stpcpy_chk-c.c b/sysdeps/x86_64/stpcpy_chk-c.c
new file mode 100644
index 0000000000..900fa761cc
--- /dev/null
+++ b/sysdeps/x86_64/stpcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/stpcpy_chk.c>
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index 8bea6fb5db..783237937d 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -25,6 +25,11 @@
 
 	.text
 ENTRY (strcat)
+#ifdef __CHKP__
+	bndcl (%rdi), %bnd0
+	bndcl (%rsi), %bnd1
+#endif
+
 	movq %rdi, %rcx		/* Dest. register. */
 	andl $7, %ecx		/* mask alignment bits */
 	movq %rdi, %rax		/* Duplicate destination pointer.  */
@@ -36,7 +41,11 @@ ENTRY (strcat)
 	neg %ecx		/* We need to align to 8 bytes.  */
 	addl $8,%ecx
 	/* Search the first bytes directly.  */
-0:	cmpb $0x0,(%rax)	/* is byte NUL? */
+0:
+#ifdef __CHKP__
+	bndcu (%rax), %bnd0
+#endif
+	cmpb $0x0,(%rax)	/* is byte NUL? */
 	je 2f			/* yes => start copy */
 	incq %rax		/* increment pointer */
 	decl %ecx
@@ -48,6 +57,9 @@ ENTRY (strcat)
 	.p2align 4
 4:
 	/* First unroll.  */
+#ifdef __CHKP__
+	bndcu (%rax), %bnd0
+#endif
 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
 	addq $8,%rax		/* adjust pointer for next word */
 	movq %r8, %rdx		/* magic value */
@@ -62,6 +74,9 @@ ENTRY (strcat)
 	jnz 3f			/* found NUL => return pointer */
 
 	/* Second unroll.  */
+#ifdef __CHKP__
+	bndcu (%rax), %bnd0
+#endif
 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
 	addq $8,%rax		/* adjust pointer for next word */
 	movq %r8, %rdx		/* magic value */
@@ -76,6 +91,9 @@ ENTRY (strcat)
 	jnz 3f			/* found NUL => return pointer */
 
 	/* Third unroll.  */
+#ifdef __CHKP__
+	bndcu (%rax), %bnd0
+#endif
 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
 	addq $8,%rax		/* adjust pointer for next word */
 	movq %r8, %rdx		/* magic value */
@@ -90,6 +108,9 @@ ENTRY (strcat)
 	jnz 3f			/* found NUL => return pointer */
 
 	/* Fourth unroll.  */
+#ifdef __CHKP__
+	bndcu (%rax), %bnd0
+#endif
 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
 	addq $8,%rax		/* adjust pointer for next word */
 	movq %r8, %rdx		/* magic value */
@@ -163,6 +184,9 @@ ENTRY (strcat)
 	.p2align 4
 22:
 	/* 1st unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -177,10 +201,16 @@ ENTRY (strcat)
 
 	jnz	23f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 
 	/* 2nd unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -195,10 +225,16 @@ ENTRY (strcat)
 
 	jnz	23f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 
 	/* 3rd unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -213,10 +249,16 @@ ENTRY (strcat)
 
 	jnz	23f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 
 	/* 4th unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -231,6 +273,9 @@ ENTRY (strcat)
 
 	jnz	23f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 	jmp	22b		/* Next iteration.  */
@@ -239,10 +284,16 @@ ENTRY (strcat)
 	   The loop is unrolled twice.  */
 	.p2align 4
 23:
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movb	%al, (%rdx)	/* 1st byte.  */
 	testb	%al, %al	/* Is it NUL.  */
 	jz	24f		/* yes, finish.  */
 	incq	%rdx		/* Increment destination.  */
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movb	%ah, (%rdx)	/* 2nd byte.  */
 	testb	%ah, %ah	/* Is it NUL?.  */
 	jz	24f		/* yes, finish.  */
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index d89f1eba8f..8519a81117 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -22,6 +22,10 @@
 
 	.text
 ENTRY (strchr)
+#ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+#endif
 	movd	%esi, %xmm1
 	movq	%rdi, %rcx
 	punpcklbw %xmm1, %xmm1
@@ -29,6 +33,9 @@ ENTRY (strchr)
 	pxor	%xmm2, %xmm2
 	punpcklbw %xmm1, %xmm1
 	orl	$0xffffffff, %esi
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pshufd	$0, %xmm1, %xmm1
 	subq	%rdi, %rcx
@@ -44,7 +51,11 @@ ENTRY (strchr)
 	orl	%edx, %ecx
 	jnz	1f
 
-2:	movdqa	(%rdi), %xmm0
+2:
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+#endif
+	movdqa	(%rdi), %xmm0
 	leaq	16(%rdi), %rdi
 	movdqa	%xmm0, %xmm3
 	pcmpeqb	%xmm1, %xmm0
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index d8c345ba7d..3e4abfa217 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -23,6 +23,10 @@
 
 	.text
 ENTRY (__strchrnul)
+#ifdef __CHKP__
+	bndcl 	(%rdi), %bnd0
+	bndcu 	(%rdi), %bnd0
+#endif
 	movd	%esi, %xmm1
 	movq	%rdi, %rcx
 	punpcklbw %xmm1, %xmm1
@@ -44,7 +48,11 @@ ENTRY (__strchrnul)
 	andl	%esi, %ecx
 	jnz	1f
 
-2:	movdqa	(%rdi), %xmm0
+2:
+#ifdef __CHKP__
+	bndcu 	(%rdi), %bnd0
+#endif
+	movdqa	(%rdi), %xmm0
 	leaq	16(%rdi), %rdi
 	movdqa	%xmm0, %xmm3
 	pcmpeqb	%xmm1, %xmm0
@@ -56,6 +64,9 @@ ENTRY (__strchrnul)
 
 1:	bsfl	%ecx, %edx
 	leaq	-16(%rdi,%rdx), %rax
+#ifdef __CHKP__
+	bndcu 	(%rax), %bnd0
+#endif
 	ret
 END (__strchrnul)
 
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 76809373e8..89597841ad 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -128,7 +128,16 @@ libc_hidden_def (__strncasecmp)
 ENTRY (STRCMP)
 #ifdef NOT_IN_libc
 /* Simple version since we can't use SSE registers in ld.so.  */
-L(oop):	movb	(%rdi), %al
+#ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcl	(%rsi), %bnd1
+#endif
+L(oop):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+	bndcu	(%rsi), %bnd1
+#endif
+	movb	(%rdi), %al
 	cmpb	(%rsi), %al
 	jne	L(neq)
 	incq	%rdi
@@ -177,6 +186,12 @@ END (STRCMP)
 	je	LABEL(Byte0)
 	mov	%rdx, %r11
 # endif
+#ifdef __CHKP__
+	bndcl  	(%rdi), %bnd0
+	bndcu  	(%rdi), %bnd0
+	bndcl  	(%rsi), %bnd1
+	bndcu  	(%rsi), %bnd1
+#endif
 	mov	%esi, %ecx
 	mov	%edi, %eax
 /* Use 64bit AND here to avoid long NOP padding.  */
@@ -243,6 +258,10 @@ END (STRCMP)
 # endif
 	add	$16, %rsi		/* prepare to search next 16 bytes */
 	add	$16, %rdi		/* prepare to search next 16 bytes */
+#ifdef __CHKP__
+	bndcu  	(%rdi), %bnd0
+	bndcu  	(%rsi), %bnd1
+#endif
 
 	/*
 	 * Determine source and destination string offsets from 16-byte alignment.
@@ -263,6 +282,11 @@ LABEL(crosscache):
 	mov	%edx, %r8d			/* r8d is offset flag for exit tail */
 	xchg	%ecx, %eax
 	xchg	%rsi, %rdi
+#ifdef __CHKP__
+	bndmov 	%bnd0, %bnd2
+	bndmov 	%bnd1, %bnd0
+	bndmov 	%bnd2, %bnd1
+#endif
 LABEL(bigger):
 	lea	15(%rax), %r9
 	sub	%rcx, %r9
@@ -310,6 +334,10 @@ LABEL(ashr_0):
 	 */
 	.p2align 4
 LABEL(loop_ashr_0):
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rcx), %bnd0
+	bndcu	-1(%rsi, %rcx), %bnd1
+#endif
 	movdqa	(%rsi, %rcx), %xmm1
 	movdqa	(%rdi, %rcx), %xmm2
 	TOLOWER (%xmm1, %xmm2)
@@ -326,6 +354,10 @@ LABEL(loop_ashr_0):
 	jbe	LABEL(strcmp_exitz)
 # endif
 	add	$16, %rcx
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rcx), %bnd0
+	bndcu	-1(%rsi, %rcx), %bnd1
+#endif
 	movdqa	(%rsi, %rcx), %xmm1
 	movdqa	(%rdi, %rcx), %xmm2
 	TOLOWER (%xmm1, %xmm2)
@@ -377,6 +409,15 @@ LABEL(ashr_1):
 	lea	1(%rdi), %r10
 	and	$0xfff, %r10		/* offset into 4K page */
 	sub	$0x1000, %r10		/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_1)
+LABEL(ashr_1_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_1)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_1):
@@ -460,7 +501,11 @@ LABEL(nibble_ashr_1):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10		/* substract 4K from %r10 */
+# ifdef __CHKP__
+	jmp	LABEL(ashr_1_check)
+# else
 	jmp	LABEL(gobble_ashr_1)
+# endif
 
 	/*
 	 * Once find null char, determine if there is a string mismatch
@@ -507,6 +552,15 @@ LABEL(ashr_2):
 	lea	2(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_2)
+LABEL(ashr_2_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_2)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_2):
@@ -588,7 +642,11 @@ LABEL(nibble_ashr_2):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_2_check)
+# else
 	jmp	LABEL(gobble_ashr_2)
+# endif
 
 	.p2align 4
 LABEL(ashr_2_exittail):
@@ -632,6 +690,15 @@ LABEL(ashr_3):
 	lea	3(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_3)
+LABEL(ashr_3_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_3)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_3):
@@ -713,7 +780,11 @@ LABEL(nibble_ashr_3):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_3_check)
+# else
 	jmp	LABEL(gobble_ashr_3)
+# endif
 
 	.p2align 4
 LABEL(ashr_3_exittail):
@@ -757,6 +828,15 @@ LABEL(ashr_4):
 	lea	4(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_4)
+LABEL(ashr_4_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_4)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_4):
@@ -838,7 +918,11 @@ LABEL(nibble_ashr_4):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_4_check)
+# else
 	jmp	LABEL(gobble_ashr_4)
+# endif
 
 	.p2align 4
 LABEL(ashr_4_exittail):
@@ -882,6 +966,15 @@ LABEL(ashr_5):
 	lea	5(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_5)
+LABEL(ashr_5_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_5)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_5):
@@ -963,7 +1056,11 @@ LABEL(nibble_ashr_5):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_5_check)
+# else
 	jmp	LABEL(gobble_ashr_5)
+# endif
 
 	.p2align 4
 LABEL(ashr_5_exittail):
@@ -1007,6 +1104,15 @@ LABEL(ashr_6):
 	lea	6(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_6)
+LABEL(ashr_6_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_6)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_6):
@@ -1088,7 +1194,11 @@ LABEL(nibble_ashr_6):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_6_check)
+# else
 	jmp	LABEL(gobble_ashr_6)
+# endif
 
 	.p2align 4
 LABEL(ashr_6_exittail):
@@ -1132,6 +1242,15 @@ LABEL(ashr_7):
 	lea	7(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_7)
+LABEL(ashr_7_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_7)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_7):
@@ -1213,7 +1332,11 @@ LABEL(nibble_ashr_7):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_7_check)
+# else
 	jmp	LABEL(gobble_ashr_7)
+# endif
 
 	.p2align 4
 LABEL(ashr_7_exittail):
@@ -1257,6 +1380,15 @@ LABEL(ashr_8):
 	lea	8(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_8)
+LABEL(ashr_8_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_8)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_8):
@@ -1338,7 +1470,11 @@ LABEL(nibble_ashr_8):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_8_check)
+# else
 	jmp	LABEL(gobble_ashr_8)
+# endif
 
 	.p2align 4
 LABEL(ashr_8_exittail):
@@ -1382,6 +1518,15 @@ LABEL(ashr_9):
 	lea	9(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_9)
+LABEL(ashr_9_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_9)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_9):
@@ -1463,7 +1608,11 @@ LABEL(nibble_ashr_9):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_9_check)
+# else
 	jmp	LABEL(gobble_ashr_9)
+# endif
 
 	.p2align 4
 LABEL(ashr_9_exittail):
@@ -1507,6 +1656,15 @@ LABEL(ashr_10):
 	lea	10(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_10)
+LABEL(ashr_10_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_10)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_10):
@@ -1588,7 +1746,11 @@ LABEL(nibble_ashr_10):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_10_check)
+# else
 	jmp	LABEL(gobble_ashr_10)
+# endif
 
 	.p2align 4
 LABEL(ashr_10_exittail):
@@ -1632,6 +1794,15 @@ LABEL(ashr_11):
 	lea	11(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_11)
+LABEL(ashr_11_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_11)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_11):
@@ -1713,7 +1884,11 @@ LABEL(nibble_ashr_11):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_11_check)
+# else
 	jmp	LABEL(gobble_ashr_11)
+# endif
 
 	.p2align 4
 LABEL(ashr_11_exittail):
@@ -1757,6 +1932,15 @@ LABEL(ashr_12):
 	lea	12(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_12)
+LABEL(ashr_12_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_12)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_12):
@@ -1838,7 +2022,11 @@ LABEL(nibble_ashr_12):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_12_check)
+# else
 	jmp	LABEL(gobble_ashr_12)
+# endif
 
 	.p2align 4
 LABEL(ashr_12_exittail):
@@ -1882,6 +2070,15 @@ LABEL(ashr_13):
 	lea	13(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_13)
+LABEL(ashr_13_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_13)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_13):
@@ -1963,7 +2160,11 @@ LABEL(nibble_ashr_13):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_13_check)
+# else
 	jmp	LABEL(gobble_ashr_13)
+# endif
 
 	.p2align 4
 LABEL(ashr_13_exittail):
@@ -2007,6 +2208,15 @@ LABEL(ashr_14):
 	lea	14(%rdi), %r10
 	and	$0xfff, %r10	/* offset into 4K page */
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_14)
+LABEL(ashr_14_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_14)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_14):
@@ -2088,7 +2298,11 @@ LABEL(nibble_ashr_14):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_14_check)
+# else
 	jmp	LABEL(gobble_ashr_14)
+# endif
 
 	.p2align 4
 LABEL(ashr_14_exittail):
@@ -2134,6 +2348,15 @@ LABEL(ashr_15):
 	and	$0xfff, %r10	/* offset into 4K page */
 
 	sub	$0x1000, %r10	/* subtract 4K pagesize */
+# ifdef __CHKP__
+	bndcu	-16(%rdi, %rcx), %bnd0
+	bndcu	-16(%rsi, %rcx), %bnd1
+	jmp 	LABEL(loop_ashr_15)
+LABEL(ashr_15_check):
+	bndcu	(%rdi, %rcx), %bnd0
+	bndcu	(%rsi, %rcx), %bnd1
+	jmp	LABEL(gobble_ashr_15)
+# endif
 
 	.p2align 4
 LABEL(loop_ashr_15):
@@ -2215,7 +2438,11 @@ LABEL(nibble_ashr_15):
 
 	pxor	%xmm0, %xmm0
 	sub	$0x1000, %r10
+# ifdef __CHKP__
+	jmp	LABEL(ashr_15_check)
+# else
 	jmp	LABEL(gobble_ashr_15)
+# endif
 
 	.p2align 4
 LABEL(ashr_15_exittail):
@@ -2240,6 +2467,11 @@ LABEL(less32bytes):
 	test	%r8d, %r8d
 	jz	LABEL(ret)
 	xchg	%rsi, %rdi		/* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+	bndmov	%bnd0, %bnd2
+	bndmov	%bnd1, %bnd0
+	bndmov	%bnd2, %bnd1
+#endif
 
 	.p2align 4
 LABEL(ret):
@@ -2250,6 +2482,10 @@ LABEL(less16bytes):
 	sub	%rdx, %r11
 	jbe	LABEL(strcmp_exitz)
 # endif
+/*#ifdef __CHKP__
+	bndcu	(%rdi, %rdx), %bnd0
+	bndcu	(%rsi, %rdx), %bnd1
+#endif*/
 	movzbl	(%rsi, %rdx), %ecx
 	movzbl	(%rdi, %rdx), %eax
 
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 612824700a..2b78e95427 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -26,6 +26,10 @@
 
 	.text
 ENTRY (STRCPY)
+#ifdef __CHKP__
+	bndcl (%rdi), %bnd0
+	bndcl (%rsi), %bnd1
+#endif
 	movq %rsi, %rcx		/* Source register. */
 	andl $7, %ecx		/* mask alignment bits */
 	movq %rdi, %rdx		/* Duplicate destination pointer.  */
@@ -36,8 +40,14 @@ ENTRY (STRCPY)
 	addl $8,%ecx
 	/* Search the first bytes directly.  */
 0:
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movb	(%rsi), %al	/* Fetch a byte */
 	testb	%al, %al	/* Is it NUL? */
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movb	%al, (%rdx)	/* Store it */
 	jz	4f		/* If it was NUL, done! */
 	incq	%rsi
@@ -54,6 +64,9 @@ ENTRY (STRCPY)
 	.p2align 4
 1:
 	/* 1st unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -68,10 +81,16 @@ ENTRY (STRCPY)
 
 	jnz	3f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 
 	/* 2nd unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -86,10 +105,16 @@ ENTRY (STRCPY)
 
 	jnz	3f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 
 	/* 3rd unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -104,10 +129,16 @@ ENTRY (STRCPY)
 
 	jnz	3f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 
 	/* 4th unroll.  */
+#ifdef __CHKP__
+	bndcu	(%rsi), %bnd1
+#endif
 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
 	addq	$8, %rsi	/* Adjust pointer for next word.  */
 	movq	%rax, %r9	/* Save a copy for NUL finding.  */
@@ -122,6 +153,9 @@ ENTRY (STRCPY)
 
 	jnz	3f		/* found NUL => return pointer */
 
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movq	%rax, (%rdx)	/* Write value to destination.  */
 	addq	$8, %rdx	/* Adjust pointer.  */
 	jmp	1b		/* Next iteration.  */
@@ -132,10 +166,16 @@ ENTRY (STRCPY)
 3:
 	/* Note that stpcpy needs to return with the value of the NUL
 	   byte.  */
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movb	%al, (%rdx)	/* 1st byte.  */
 	testb	%al, %al	/* Is it NUL.  */
 	jz	4f		/* yes, finish.  */
 	incq	%rdx		/* Increment destination.  */
+#ifdef __CHKP__
+	bndcu	(%rdx), %bnd0
+#endif
 	movb	%ah, (%rdx)	/* 2nd byte.  */
 	testb	%ah, %ah	/* Is it NUL?.  */
 	jz	4f		/* yes, finish.  */
diff --git a/sysdeps/x86_64/strcpy_chk-c.c b/sysdeps/x86_64/strcpy_chk-c.c
new file mode 100644
index 0000000000..b48704cec4
--- /dev/null
+++ b/sysdeps/x86_64/strcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/strcpy_chk.c>
diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
index 7e171deefc..a1028204c5 100644
--- a/sysdeps/x86_64/strcpy_chk.S
+++ b/sysdeps/x86_64/strcpy_chk.S
@@ -18,6 +18,7 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#if !defined  __CHKP__ && !defined __CHKWR__
 #include <sysdep.h>
 #include "asm-syntax.h"
 
@@ -206,3 +207,4 @@ ENTRY (STRCPY_CHK)
 	jmp	HIDDEN_JUMPTARGET (__chk_fail)
 
 END (STRCPY_CHK)
+#endif
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index 65f8a9e96e..0acca21399 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -29,6 +29,12 @@
 
 	.text
 ENTRY (strcspn)
+# ifdef __CHKP__
+	bndcl (%rdi), %bnd0
+	bndcu (%rdi), %bnd0
+	bndcl (%rsi), %bnd1
+	bndcu (%rsi), %bnd1
+# endif
 
 	movq %rdi, %rdx		/* Save SRC.  */
 
@@ -54,21 +60,34 @@ ENTRY (strcspn)
    have a correct zero-extended 64-bit value in %rcx.  */
 
 	.p2align 4
-L(2):	movb (%rax), %cl	/* get byte from skipset */
+L(2):
+# ifdef __CHKP__
+	bndcu (%rax), %bnd1
+# endif
+	movb (%rax), %cl	/* get byte from skipset */
 	testb %cl, %cl		/* is NUL char? */
 	jz L(1)			/* yes => start compare loop */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
 
+# ifdef __CHKP__
+	bndcu 1(%rax), %bnd1
+# endif
 	movb 1(%rax), %cl	/* get byte from skipset */
 	testb $0xff, %cl	/* is NUL char? */
 	jz L(1)			/* yes => start compare loop */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
 
+# ifdef __CHKP__
+	bndcu 2(%rax), %bnd1
+# endif
 	movb 2(%rax), %cl	/* get byte from skipset */
 	testb $0xff, %cl	/* is NUL char? */
 	jz L(1)			/* yes => start compare loop */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
 
+# ifdef __CHKP__
+	bndcu 3(%rax), %bnd1
+# endif
 	movb 3(%rax), %cl	/* get byte from skipset */
 	addq $4, %rax		/* increment skipset pointer */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
@@ -89,18 +108,30 @@ L(1):	leaq -4(%rdx), %rax	/* prepare loop */
 	.p2align 4
 L(3):	addq $4, %rax		/* adjust pointer for full loop round */
 
+# ifdef __CHKP__
+	bndcu (%rax), %bnd0
+# endif
 	movb (%rax), %cl	/* get byte from string */
 	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	je L(4)			/* yes => return */
 
+# ifdef __CHKP__
+	bndcu 1(%rax), %bnd0
+# endif
 	movb 1(%rax), %cl	/* get byte from string */
 	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	je L(5)			/* yes => return */
 
+# ifdef __CHKP__
+	bndcu 2(%rax), %bnd0
+# endif
 	movb 2(%rax), %cl	/* get byte from string */
 	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	jz L(6)			/* yes => return */
 
+# ifdef __CHKP__
+	bndcu 3(%rax), %bnd0
+# endif
 	movb 3(%rax), %cl	/* get byte from string */
 	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	jne L(3)		/* no => start loop again */
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index eeb1092218..065f0e6d56 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -63,6 +63,10 @@ L(n_nonzero):
 	mov	%rsi, %r11
 #endif
 
+#ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+#endif
 	pxor	%xmm8, %xmm8
 	pxor	%xmm9, %xmm9
 	pxor	%xmm10, %xmm10
@@ -157,6 +161,9 @@ L(loop_init):
 L(loop):
 
 	addq	$64, %rax
+# ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+# endif
 	cmpq	%rax, %r10
 	je	L(exit_end)
 
@@ -182,6 +189,9 @@ L(first):
 	bsfq	%rdx, %rdx
 	addq	%rdx, %rax
 	subq	%rdi, %rax
+# ifdef __CHKP__
+	bndcu	-1(%rdi, %rax), %bnd0
+# endif
 	ret
 
 	.p2align 4
@@ -192,6 +202,9 @@ L(exit):
 	bsfq	%rdx, %rdx
 	addq	%rdx, %rax
 	subq	%rdi, %rax
+# ifdef __CHKP__
+	bndcu	-1(%rdi, %rax), %bnd0
+# endif
 	ret
 
 #else
@@ -199,6 +212,9 @@ L(exit):
 	/* Main loop.  Unrolled twice to improve L2 cache performance on core2.  */
 	.p2align 4
 L(loop):
+# ifdef __CHKP__
+	bndcu	64(%rax), %bnd0
+# endif
 
 	movdqa	64(%rax), %xmm8
 	pminub	80(%rax), %xmm8
@@ -231,6 +247,9 @@ L(exit0):
 	bsfq	%rdx, %rdx
 	addq	%rdx, %rax
 	subq	%rdi, %rax
+# ifdef __CHKP__
+	bndcu	-1(%rdi, %rax), %bnd0
+# endif
 	ret
 
 #endif
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index e413b07438..0bd3405b6b 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -22,6 +22,10 @@
 
 	.text
 ENTRY (strrchr)
+# ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+# endif
 	movd	%esi, %xmm1
 	movq	%rdi, %rcx
 	punpcklbw %xmm1, %xmm1
@@ -46,7 +50,11 @@ ENTRY (strrchr)
 	orl	%ecx, %esi
 	jnz	1f
 
-2:	movdqa	(%rdi), %xmm0
+2:
+# ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+# endif
+	movdqa	(%rdi), %xmm0
 	leaq	16(%rdi), %rdi
 	movdqa	%xmm0, %xmm3
 	pcmpeqb	%xmm1, %xmm0
@@ -73,6 +81,9 @@ ENTRY (strrchr)
 	bsrl	%edx, %edx
 	jz	4f
 	leaq	-16(%rdi,%rdx), %rax
+# ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+# endif
 4:	ret
 END (strrchr)
 
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 2911da2aa5..bd3be8ad6a 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -25,6 +25,12 @@
 
 	.text
 ENTRY (strspn)
+#ifdef __CHKP__
+	bndcl (%rdi), %bnd0
+	bndcu (%rdi), %bnd0
+	bndcl (%rsi), %bnd1
+	bndcu (%rsi), %bnd1
+#endif
 
 	movq %rdi, %rdx		/* Save SRC.  */
 
@@ -50,21 +56,34 @@ ENTRY (strspn)
    have a correct zero-extended 64-bit value in %rcx.  */
 
 	.p2align 4
-L(2):	movb (%rax), %cl	/* get byte from stopset */
+L(2):
+#ifdef __CHKP__
+	bndcu (%rax), %bnd1
+#endif
+	movb (%rax), %cl	/* get byte from stopset */
 	testb %cl, %cl		/* is NUL char? */
 	jz L(1)			/* yes => start compare loop */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
 
+#ifdef __CHKP__
+	bndcu 1(%rax), %bnd1
+#endif
 	movb 1(%rax), %cl	/* get byte from stopset */
 	testb $0xff, %cl	/* is NUL char? */
 	jz L(1)			/* yes => start compare loop */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
 
+#ifdef __CHKP__
+	bndcu 2(%rax), %bnd1
+#endif
 	movb 2(%rax), %cl	/* get byte from stopset */
 	testb $0xff, %cl	/* is NUL char? */
 	jz L(1)			/* yes => start compare loop */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
 
+#ifdef __CHKP__
+	bndcu 3(%rax), %bnd1
+#endif
 	movb 3(%rax), %cl	/* get byte from stopset */
 	addq $4, %rax		/* increment stopset pointer */
 	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
@@ -85,18 +104,30 @@ L(1):	leaq -4(%rdx), %rax	/* prepare loop */
 	.p2align 4
 L(3):	addq $4, %rax		/* adjust pointer for full loop round */
 
+#ifdef __CHKP__
+	bndcu (%rax), %bnd0
+#endif
 	movb (%rax), %cl	/* get byte from string */
 	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	jz L(4)			/* no => return */
 
+#ifdef __CHKP__
+	bndcu 1(%rax), %bnd0
+#endif
 	movb 1(%rax), %cl	/* get byte from string */
 	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	jz L(5)			/* no => return */
 
+#ifdef __CHKP__
+	bndcu 2(%rax), %bnd0
+#endif
 	movb 2(%rax), %cl	/* get byte from string */
 	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	jz L(6)			/* no => return */
 
+#ifdef __CHKP__
+	bndcu 3(%rax), %bnd0
+#endif
 	movb 3(%rax), %cl	/* get byte from string */
 	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
 	jnz L(3)		/* yes => start loop again */
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
index 5636d9a625..17e2521608 100644
--- a/sysdeps/x86_64/strtok.S
+++ b/sysdeps/x86_64/strtok.S
@@ -90,6 +90,9 @@ ENTRY (FUNCTION)
 	   the last run.  */
 	cmpq $0, %rdx
 	cmove %rax, %rdx
+#ifdef __CHKP__
+	bndldx (,%rax,1),%bnd0
+#endif
 	testq %rdx, %rdx
 	jz L(returnNULL)
 	movq %rsi, %rax		/* Get start of delimiter set.  */
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index 3f098dc51c..3ab1e47f82 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -22,6 +22,11 @@
 	.text
 ENTRY (wcschr)
 
+#ifdef __CHKP__
+	bndcl  	(%rdi), %bnd0
+	bndcu  	(%rdi), %bnd0
+#endif
+
 	movd	%rsi, %xmm1
 	pxor	%xmm2, %xmm2
 	mov	%rdi, %rcx
@@ -43,6 +48,9 @@ ENTRY (wcschr)
 
 	and	$-16, %rdi
 
+#ifdef __CHKP__
+	bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqd	%xmm0, %xmm2
 	add	$16, %rdi
@@ -78,6 +86,9 @@ L(cross_cache):
 L(unaligned_match):
 	add	%rdi, %rax
 	add	%rcx, %rax
+#ifdef __CHKP__
+	bndcu  	(%rax), %bnd0
+#endif
 	ret
 
 	.p2align 4
@@ -91,6 +102,9 @@ L(unaligned_no_match):
 	.p2align 4
 /* Loop start on aligned string.  */
 L(loop):
+#ifdef __CHKP__
+	bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqd	%xmm0, %xmm2
 	add	$16, %rdi
@@ -100,6 +114,9 @@ L(loop):
 	or	%rax, %rdx
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqd	%xmm0, %xmm2
 	add	$16, %rdi
@@ -109,6 +126,9 @@ L(loop):
 	or	%rax, %rdx
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqd	%xmm0, %xmm2
 	add	$16, %rdi
@@ -118,6 +138,9 @@ L(loop):
 	or	%rax, %rdx
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu  	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqd	%xmm0, %xmm2
 	add	$16, %rdi
@@ -142,6 +165,9 @@ L(matches):
 L(match):
 	sub	$16, %rdi
 	add	%rdi, %rax
+#ifdef __CHKP__
+	bndcu  	(%rax), %bnd0
+#endif
 	ret
 
 	.p2align 4
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index d6b516bce1..38e2849d5c 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -28,6 +28,14 @@ ENTRY (wcscmp)
 */
 	mov	%esi, %eax
 	mov	%edi, %edx
+
+#ifdef __CHKP__
+	bndcl  	(%rdi), %bnd0
+	bndcu  	(%rdi), %bnd0
+	bndcl  	(%rsi), %bnd1
+	bndcu  	(%rsi), %bnd1
+#endif
+
 	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
 	mov	%al, %ch
 	mov	%dl, %cl
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 5927352437..a7d944ff75 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -21,20 +21,45 @@
 
 	.text
 ENTRY (__wcslen)
+#ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+#endif
 	cmpl	$0, (%rdi)
 	jz	L(exit_tail0)
+#ifdef __CHKP__
+	bndcu	4(%rdi), %bnd0
+#endif
 	cmpl	$0, 4(%rdi)
 	jz	L(exit_tail1)
+#ifdef __CHKP__
+	bndcu	8(%rdi), %bnd0
+#endif
 	cmpl	$0, 8(%rdi)
 	jz	L(exit_tail2)
+#ifdef __CHKP__
+	bndcu	12(%rdi), %bnd0
+#endif
 	cmpl	$0, 12(%rdi)
 	jz	L(exit_tail3)
+#ifdef __CHKP__
+	bndcu	16(%rdi), %bnd0
+#endif
 	cmpl	$0, 16(%rdi)
 	jz	L(exit_tail4)
+#ifdef __CHKP__
+	bndcu	20(%rdi), %bnd0
+#endif
 	cmpl	$0, 20(%rdi)
 	jz	L(exit_tail5)
+#ifdef __CHKP__
+	bndcu	24(%rdi), %bnd0
+#endif
 	cmpl	$0, 24(%rdi)
 	jz	L(exit_tail6)
+#ifdef __CHKP__
+	bndcu	28(%rdi), %bnd0
+#endif
 	cmpl	$0, 28(%rdi)
 	jz	L(exit_tail7)
 
@@ -44,6 +69,9 @@ ENTRY (__wcslen)
 	lea	16(%rdi), %rcx
 	and	$-16, %rax
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm0
 	pmovmskb %xmm0, %edx
 	pxor	%xmm1, %xmm1
@@ -51,6 +79,9 @@ ENTRY (__wcslen)
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	pxor	%xmm2, %xmm2
@@ -58,6 +89,9 @@ ENTRY (__wcslen)
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	pxor	%xmm3, %xmm3
@@ -65,54 +99,81 @@ ENTRY (__wcslen)
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm1
 	pmovmskb %xmm1, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm2
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	lea	16(%rax), %rax
 	jnz	L(exit)
 
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	pcmpeqd	(%rax), %xmm3
 	pmovmskb %xmm3, %edx
 	test	%edx, %edx
@@ -123,6 +184,9 @@ ENTRY (__wcslen)
 
 	.p2align 4
 L(aligned_64_loop):
+#ifdef __CHKP__
+	bndcu	(%rax), %bnd0
+#endif
 	movaps	(%rax), %xmm0
 	movaps	16(%rax), %xmm1
 	movaps	32(%rax), %xmm2
@@ -173,6 +237,9 @@ L(exit):
 	mov	%dl, %cl
 	and	$15, %cl
 	jz	L(exit_1)
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rax, 4), %bnd0
+#endif
 	ret
 
 	.p2align 4
@@ -181,11 +248,17 @@ L(exit_high):
 	and	$15, %ch
 	jz	L(exit_3)
 	add	$2, %rax
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rax, 4), %bnd0
+#endif
 	ret
 
 	.p2align 4
 L(exit_1):
 	add	$1, %rax
+#ifdef __CHKP__
+	bndcu	-1(%rdi, %rax, 4), %bnd0
+#endif
 	ret
 
 	.p2align 4
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index ea1e2e55da..8edfc468b8 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -19,9 +19,22 @@
 
 #include <sysdep.h>
 
+#ifdef __CHKP__
+# define RETURN \
+      bndcu  (%rax), %bnd0; \
+      ret
+#else
+# define RETURN ret
+#endif
+
+
 	.text
 ENTRY (wcsrchr)
 
+#ifdef __CHKP__
+	bndcl	(%rdi), %bnd0
+	bndcu	(%rdi), %bnd0
+#endif
 	movd	%rsi, %xmm1
 	mov	%rdi, %rcx
 	punpckldq %xmm1, %xmm1
@@ -92,6 +105,9 @@ L(unaligned_match):
 /* Loop start on aligned string.  */
 	.p2align 4
 L(loop):
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm0
 	pcmpeqd	%xmm0, %xmm2
 	add	$16, %rdi
@@ -101,6 +117,9 @@ L(loop):
 	or	%rax, %rcx
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm3
 	pcmpeqd	%xmm3, %xmm2
 	add	$16, %rdi
@@ -110,6 +129,9 @@ L(loop):
 	or	%rax, %rcx
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm4
 	pcmpeqd	%xmm4, %xmm2
 	add	$16, %rdi
@@ -119,6 +141,9 @@ L(loop):
 	or	%rax, %rcx
 	jnz	L(matches)
 
+#ifdef __CHKP__
+	bndcu	(%rdi), %bnd0
+#endif
 	movdqa	(%rdi), %xmm5
 	pcmpeqd	%xmm5, %xmm2
 	add	$16, %rdi
@@ -145,7 +170,7 @@ L(return_value):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(match):
@@ -175,14 +200,14 @@ L(find_zero):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(find_zero_in_first_wchar):
 	test	$1, %rax
 	jz	L(return_value)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(find_zero_in_second_wchar):
@@ -192,7 +217,7 @@ L(find_zero_in_second_wchar):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(find_zero_in_third_wchar):
@@ -204,12 +229,12 @@ L(find_zero_in_third_wchar):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(prolog_find_zero):
 	add	%rcx, %rdi
-	mov     %rdx, %rcx
+	mov	%rdx, %rcx
 L(prolog_find_zero_1):
 	test	$15, %cl
 	jnz	L(prolog_find_zero_in_first_wchar)
@@ -228,14 +253,14 @@ L(prolog_find_zero_1):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(prolog_find_zero_in_first_wchar):
 	test	$1, %rax
 	jz	L(return_null)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(prolog_find_zero_in_second_wchar):
@@ -245,7 +270,7 @@ L(prolog_find_zero_in_second_wchar):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(prolog_find_zero_in_third_wchar):
@@ -257,22 +282,22 @@ L(prolog_find_zero_in_third_wchar):
 	test	$15 << 4, %al
 	jnz	L(match_second_wchar)
 	lea	-16(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(match_second_wchar):
 	lea	-12(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(match_third_wchar):
 	lea	-8(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(match_fourth_wchar):
 	lea	-4(%rdi), %rax
-	ret
+	RETURN
 
 	.p2align 4
 L(return_null):