diff options
author | Ulrich Drepper <drepper@gmail.com> | 2011-07-23 00:04:15 -0400 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-07-23 00:04:15 -0400 |
commit | 2ee5518515103e5370b47e6170e8782d7fd2fcba (patch) | |
tree | b184b118951bcb5e81052a889c8d896eeb86a4d0 /sysdeps | |
parent | 7a03a9c8c4b37b88ac5e82b557d974f3161ddaf9 (diff) | |
parent | 21137f89c574de2cadda332ad874b2e6b624f950 (diff) | |
download | glibc-2ee5518515103e5370b47e6170e8782d7fd2fcba.tar.gz glibc-2ee5518515103e5370b47e6170e8782d7fd2fcba.tar.xz glibc-2ee5518515103e5370b47e6170e8782d7fd2fcba.zip |
Merge branch 'master' of ssh://sourceware.org/git/glibc
Conflicts: ChangeLog
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S | 52 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/strcpy-ssse3.S | 4 |
2 files changed, 27 insertions, 29 deletions
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S index e73778ae1a..72642d3e81 100644 --- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S @@ -52,24 +52,28 @@ ENTRY (STRCPY) # endif - and $15, %rcx - jz L(SourceStringAlignmentZero) + and $63, %rcx + cmp $32, %rcx + jbe L(SourceStringAlignmentLess32) and $-16, %rsi + and $15, %rcx pxor %xmm0, %xmm0 pxor %xmm1, %xmm1 pcmpeqb (%rsi), %xmm1 -# ifdef USE_AS_STRNCPY - add %rcx, %r8 -# endif pmovmskb %xmm1, %rdx shr %cl, %rdx + # ifdef USE_AS_STRNCPY # if defined USE_AS_STPCPY || defined USE_AS_STRCAT - cmp $16, %r8 + mov $16, %r10 + sub %rcx, %r10 + cmp %r10, %r8 # else - cmp $17, %r8 + mov $17, %r10 + sub %rcx, %r10 + cmp %r10, %r8 # endif jbe L(CopyFrom1To16BytesTailCase2OrCase3) # endif @@ -78,12 +82,10 @@ ENTRY (STRCPY) pcmpeqb 16(%rsi), %xmm0 pmovmskb %xmm0, %rdx + # ifdef USE_AS_STRNCPY -# if defined USE_AS_STPCPY || defined USE_AS_STRCAT - cmp $32, %r8 -# else - cmp $33, %r8 -# endif + add $16, %r10 + cmp %r10, %r8 jbe L(CopyFrom1To32BytesCase2OrCase3) # endif test %rdx, %rdx @@ -92,11 +94,13 @@ ENTRY (STRCPY) movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */ movdqu %xmm1, (%rdi) - sub %rcx, %rdi - /* If source adress alignment != destination adress alignment */ .p2align 4 L(Unalign16Both): + sub %rcx, %rdi +# ifdef USE_AS_STRNCPY + add %rcx, %r8 +# endif mov $16, %rcx movdqa (%rsi, %rcx), %xmm1 movaps 16(%rsi, %rcx), %xmm2 @@ -288,9 +292,10 @@ L(Unaligned64Leave): /* If source adress alignment == destination adress alignment */ -L(SourceStringAlignmentZero): +L(SourceStringAlignmentLess32): pxor %xmm0, %xmm0 - movdqa (%rsi), %xmm1 + movdqu (%rsi), %xmm1 + movdqu 16(%rsi), %xmm2 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx @@ -305,7 +310,7 @@ L(SourceStringAlignmentZero): test %rdx, %rdx jnz L(CopyFrom1To16BytesTail1) - pcmpeqb 16(%rsi), %xmm0 + pcmpeqb %xmm2, %xmm0 movdqu %xmm1, (%rdi) pmovmskb %xmm0, %rdx @@ -319,6 +324,9 @@ L(SourceStringAlignmentZero): # endif test %rdx, %rdx jnz L(CopyFrom1To32Bytes1) + + and $-16, %rsi + and $15, %rcx jmp L(Unalign16Both) /*------End of main part with loops---------------------*/ @@ -335,9 +343,6 @@ L(CopyFrom1To16Bytes): # endif .p2align 4 L(CopyFrom1To16BytesTail): -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT - sub %rcx, %r8 -# endif add %rcx, %rsi bsf %rdx, %rdx BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) @@ -355,9 +360,6 @@ L(CopyFrom1To16BytesTail1): .p2align 4 L(CopyFrom1To32Bytes): -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT - sub %rcx, %r8 -# endif bsf %rdx, %rdx add %rcx, %rsi add $16, %rdx @@ -465,7 +467,6 @@ L(CopyFrom1To16BytesCase2): .p2align 4 L(CopyFrom1To32BytesCase2): - sub %rcx, %r8 add %rcx, %rsi bsf %rdx, %rdx add $16, %rdx @@ -475,7 +476,6 @@ L(CopyFrom1To32BytesCase2): BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) L(CopyFrom1To16BytesTailCase2): - sub %rcx, %r8 add %rcx, %rsi bsf %rdx, %rdx cmp %r8, %rdx @@ -504,7 +504,6 @@ L(CopyFrom1To16BytesCase3): L(CopyFrom1To32BytesCase2OrCase3): test %rdx, %rdx jnz L(CopyFrom1To32BytesCase2) - sub %rcx, %r8 add %rcx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) @@ -512,7 +511,6 @@ L(CopyFrom1To32BytesCase2OrCase3): L(CopyFrom1To16BytesTailCase2OrCase3): test %rdx, %rdx jnz L(CopyFrom1To16BytesTailCase2) - sub %rcx, %r8 add %rcx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S index 05faf0dfc2..c4ec54cd21 100644 --- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S @@ -84,10 +84,10 @@ ENTRY (STRCPY) # ifdef USE_AS_STRNCPY mov %rcx, %rsi + sub $16, %r8 and $0xf, %rsi /* add 16 bytes rcx_shift to r8 */ - add %rsi, %r8 # endif lea 16(%rcx), %rsi @@ -120,7 +120,7 @@ ENTRY (STRCPY) /* rax = 0: there isn't end of string from position rsi to rsi+15 */ # ifdef USE_AS_STRNCPY - sub $32, %r8 + sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax |