diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2009-07-25 19:15:14 -0700 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2009-07-25 19:15:14 -0700 |
commit | 4e5b5821bf58ddc30d455ee4968623f3334fbe28 (patch) | |
tree | 1e3ceb13e48e5210c126639e37ff90a1e7a104cc | |
parent | 657317537c09b82a2feb1194fda045f63e3a1222 (diff) | |
download | glibc-4e5b5821bf58ddc30d455ee4968623f3334fbe28.tar.gz glibc-4e5b5821bf58ddc30d455ee4968623f3334fbe28.tar.xz glibc-4e5b5821bf58ddc30d455ee4968623f3334fbe28.zip |
Some some optimizations for x86-64 strcmp.
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/strcmp.S | 13 |
2 files changed, 7 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog index f47b0897a4..abfe7dbfbc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2009-07-25 Ulrich Drepper <drepper@redhat.com> + * sysdeps/x86_64/multiarch/strcmp.S: Some more optimizations for + modern processor versions. Patch by H.J. Lu <hongjiu.lu@intel.com>. + [BZ #10448] * sysdeps/posix/getaddrinfo.c (gaih_inet): If NSS module contains no callback we must touch the status to avoid using stale value. diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index f9cf943e32..15148e4f7f 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -120,10 +120,8 @@ STRCMP_SSE42: ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ cmp $0x30, %eax ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */ - movlpd (%rdi), %xmm1 - movlpd (%rsi), %xmm2 - movhpd 8(%rdi), %xmm1 - movhpd 8(%rsi), %xmm2 + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ @@ -1492,11 +1490,8 @@ LABEL(less16bytes): sub %rdx, %r11 jbe LABEL(strcmp_exitz) #endif - xor %ecx, %ecx /* clear %ecx */ - xor %eax, %eax /* clear %eax */ - - movb (%rsi, %rdx), %cl - movb (%rdi, %rdx), %al + movzbl (%rsi, %rdx), %ecx + movzbl (%rdi, %rdx), %eax sub %ecx, %eax ret |