about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-07-23 00:04:15 -0400
committerUlrich Drepper <drepper@gmail.com>2011-07-23 00:04:15 -0400
commit2ee5518515103e5370b47e6170e8782d7fd2fcba (patch)
treeb184b118951bcb5e81052a889c8d896eeb86a4d0 /sysdeps
parent7a03a9c8c4b37b88ac5e82b557d974f3161ddaf9 (diff)
parent21137f89c574de2cadda332ad874b2e6b624f950 (diff)
downloadglibc-2ee5518515103e5370b47e6170e8782d7fd2fcba.tar.gz
glibc-2ee5518515103e5370b47e6170e8782d7fd2fcba.tar.xz
glibc-2ee5518515103e5370b47e6170e8782d7fd2fcba.zip
Merge branch 'master' of ssh://sourceware.org/git/glibc
Conflicts:
	ChangeLog
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S52
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-ssse3.S4
2 files changed, 27 insertions, 29 deletions
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index e73778ae1a..72642d3e81 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -52,24 +52,28 @@ ENTRY (STRCPY)
 
 # endif
 
-	and	$15, %rcx
-	jz	L(SourceStringAlignmentZero)
+	and	$63, %rcx
+	cmp	$32, %rcx
+	jbe	L(SourceStringAlignmentLess32)
 
 	and	$-16, %rsi
+	and	$15, %rcx
 	pxor	%xmm0, %xmm0
 	pxor	%xmm1, %xmm1
 
 	pcmpeqb	(%rsi), %xmm1
-# ifdef USE_AS_STRNCPY
-	add	%rcx, %r8
-# endif
 	pmovmskb %xmm1, %rdx
 	shr	%cl, %rdx
+
 # ifdef USE_AS_STRNCPY
 #  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
-	cmp	$16, %r8
+	mov	$16, %r10
+	sub	%rcx, %r10
+	cmp	%r10, %r8
 #  else
-	cmp	$17, %r8
+	mov	$17, %r10
+	sub	%rcx, %r10
+	cmp	%r10, %r8
 #  endif
 	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
 # endif
@@ -78,12 +82,10 @@ ENTRY (STRCPY)
 
 	pcmpeqb	16(%rsi), %xmm0
 	pmovmskb %xmm0, %rdx
+
 # ifdef USE_AS_STRNCPY
-#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
-	cmp	$32, %r8
-#  else
-	cmp	$33, %r8
-#  endif
+	add	$16, %r10
+	cmp	%r10, %r8
 	jbe	L(CopyFrom1To32BytesCase2OrCase3)
 # endif
 	test	%rdx, %rdx
@@ -92,11 +94,13 @@ ENTRY (STRCPY)
 	movdqu	(%rsi, %rcx), %xmm1   /* copy 16 bytes */
 	movdqu	%xmm1, (%rdi)
 
-	sub	%rcx, %rdi
-
 /* If source adress alignment != destination adress alignment */
 	.p2align 4
 L(Unalign16Both):
+	sub	%rcx, %rdi
+# ifdef USE_AS_STRNCPY
+	add	%rcx, %r8
+# endif
 	mov	$16, %rcx
 	movdqa	(%rsi, %rcx), %xmm1
 	movaps	16(%rsi, %rcx), %xmm2
@@ -288,9 +292,10 @@ L(Unaligned64Leave):
 
 /* If source adress alignment == destination adress alignment */
 
-L(SourceStringAlignmentZero):
+L(SourceStringAlignmentLess32):
 	pxor	%xmm0, %xmm0
-	movdqa	(%rsi), %xmm1
+	movdqu	(%rsi), %xmm1
+	movdqu	16(%rsi), %xmm2
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %rdx
 
@@ -305,7 +310,7 @@ L(SourceStringAlignmentZero):
 	test	%rdx, %rdx
 	jnz	L(CopyFrom1To16BytesTail1)
 
-	pcmpeqb	16(%rsi), %xmm0
+	pcmpeqb	%xmm2, %xmm0
 	movdqu	%xmm1, (%rdi)
 	pmovmskb %xmm0, %rdx
 
@@ -319,6 +324,9 @@ L(SourceStringAlignmentZero):
 # endif
 	test	%rdx, %rdx
 	jnz	L(CopyFrom1To32Bytes1)
+
+	and	$-16, %rsi
+	and	$15, %rcx
 	jmp	L(Unalign16Both)
 
 /*------End of main part with loops---------------------*/
@@ -335,9 +343,6 @@ L(CopyFrom1To16Bytes):
 # endif
 	.p2align 4
 L(CopyFrom1To16BytesTail):
-# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
-	sub	%rcx, %r8
-# endif
 	add	%rcx, %rsi
 	bsf	%rdx, %rdx
 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
@@ -355,9 +360,6 @@ L(CopyFrom1To16BytesTail1):
 
 	.p2align 4
 L(CopyFrom1To32Bytes):
-# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
-	sub	%rcx, %r8
-# endif
 	bsf	%rdx, %rdx
 	add	%rcx, %rsi
 	add	$16, %rdx
@@ -465,7 +467,6 @@ L(CopyFrom1To16BytesCase2):
 
 	.p2align 4
 L(CopyFrom1To32BytesCase2):
-	sub	%rcx, %r8
 	add	%rcx, %rsi
 	bsf	%rdx, %rdx
 	add	$16, %rdx
@@ -475,7 +476,6 @@ L(CopyFrom1To32BytesCase2):
 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
 
 L(CopyFrom1To16BytesTailCase2):
-	sub	%rcx, %r8
 	add	%rcx, %rsi
 	bsf	%rdx, %rdx
 	cmp	%r8, %rdx
@@ -504,7 +504,6 @@ L(CopyFrom1To16BytesCase3):
 L(CopyFrom1To32BytesCase2OrCase3):
 	test	%rdx, %rdx
 	jnz	L(CopyFrom1To32BytesCase2)
-	sub	%rcx, %r8
 	add	%rcx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
 
@@ -512,7 +511,6 @@ L(CopyFrom1To32BytesCase2OrCase3):
 L(CopyFrom1To16BytesTailCase2OrCase3):
 	test	%rdx, %rdx
 	jnz	L(CopyFrom1To16BytesTailCase2)
-	sub	%rcx, %r8
 	add	%rcx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
 
diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
index 05faf0dfc2..c4ec54cd21 100644
--- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
@@ -84,10 +84,10 @@ ENTRY (STRCPY)
 
 # ifdef USE_AS_STRNCPY
 	mov	%rcx, %rsi
+	sub	$16, %r8
 	and	$0xf, %rsi
 
 /* add 16 bytes rcx_shift to r8 */
-
 	add	%rsi, %r8
 # endif
 	lea	16(%rcx), %rsi
@@ -120,7 +120,7 @@ ENTRY (STRCPY)
 /* rax = 0: there isn't end of string from position rsi to rsi+15 */
 
 # ifdef USE_AS_STRNCPY
-	sub	$32, %r8
+	sub	$16, %r8
 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
 # endif
 	test	%rax, %rax