diff options
author | Will Schmidt <will_schmidt@vnet.ibm.com> | 2011-08-18 11:01:44 -0500 |
---|---|---|
committer | Ryan S. Arnold <rsa@us.ibm.com> | 2011-08-18 11:01:44 -0500 |
commit | a7e0baec8c61a6bdf3b8fcb4ccb725477254f1d3 (patch) | |
tree | 313fa7049fd3e214a72a554a1cee7ae9b5b01b32 | |
parent | 0c6ff7937931f1171a6aba3970c4aea7978a1b86 (diff) | |
download | glibc-a7e0baec8c61a6bdf3b8fcb4ccb725477254f1d3.tar.gz glibc-a7e0baec8c61a6bdf3b8fcb4ccb725477254f1d3.tar.xz glibc-a7e0baec8c61a6bdf3b8fcb4ccb725477254f1d3.zip |
Provide a throughput boost to the 64-bit power7 strncmp code of approx
15%. The 32-bit throughput is not notably affected by this change, so the change to the 32-bit code is done to keep the two files in sync with each other.
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/strncmp.S | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/strncmp.S | 4 |
3 files changed, 13 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog index f1a40044b7..0638748a64 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2011-08-04 Will Schmidt <will_schmidt@vnet.ibm.com> + + * sysdeps/powerpc/powerpc32/power7/strncmp.S: Adjust the alignment + and add nop instructions for throughput optimization. + * sysdeps/powerpc/powerpc64/power7/strncmp.S: Adjust the alignment + and nop instructions for throughput optimization. + 2011-08-01 Adhemerval Zanella <azanella@linux.vnet.ibm.com> * sysdeps/powerpc/fpu/e_hypot.c: New file: hypot optimized for POWER. diff --git a/sysdeps/powerpc/powerpc32/power7/strncmp.S b/sysdeps/powerpc/powerpc32/power7/strncmp.S index 7ee9e03e7a..db466f0793 100644 --- a/sysdeps/powerpc/powerpc32/power7/strncmp.S +++ b/sysdeps/powerpc/powerpc32/power7/strncmp.S @@ -27,7 +27,7 @@ const char *s2 [r4], size_t size [r5]) */ -EALIGN (BP_SYM(strncmp),4,0) +EALIGN (BP_SYM(strncmp),5,0) #define rTMP r0 #define rRTN r3 @@ -47,9 +47,11 @@ EALIGN (BP_SYM(strncmp),4,0) #define rBITDIF r11 /* bits that differ in s1 & s2 words */ dcbt 0,rSTR1 + nop or rTMP,rSTR2,rSTR1 lis r7F7F,0x7f7f dcbt 0,rSTR2 + nop clrlwi. rTMP,rTMP,30 cmplwi cr1,rN,0 lis rFEFE,-0x101 diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S index 5ee5e2eafa..eace179a6c 100644 --- a/sysdeps/powerpc/powerpc64/power7/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -27,7 +27,7 @@ const char *s2 [r4], size_t size [r5]) */ -EALIGN (BP_SYM(strncmp),4,0) +EALIGN (BP_SYM(strncmp),5,0) CALL_MCOUNT 3 #define rTMP r0 @@ -48,9 +48,11 @@ EALIGN (BP_SYM(strncmp),4,0) #define rBITDIF r11 /* bits that differ in s1 & s2 words */ dcbt 0,rSTR1 + nop or rTMP,rSTR2,rSTR1 lis r7F7F,0x7f7f dcbt 0,rSTR2 + nop clrldi. rTMP,rTMP,61 cmpldi cr1,rN,0 lis rFEFE,-0x101 |