diff options
author | Alan Modra <amodra@gmail.com> | 2013-08-17 18:40:11 +0930 |
---|---|---|
committer | Alan Modra <amodra@gmail.com> | 2013-10-04 10:39:32 +0930 |
commit | db9b4570c5dc550074140ac1d1677077fba29a26 (patch) | |
tree | c6469b8ce6b7ec28cc9f7c27484e67d351ce3349 /sysdeps/powerpc/powerpc32/power7/strlen.S | |
parent | f7c399cff5bd04ee9dc117fb6b0f39597dc047c6 (diff) | |
download | glibc-db9b4570c5dc550074140ac1d1677077fba29a26.tar.gz glibc-db9b4570c5dc550074140ac1d1677077fba29a26.tar.xz glibc-db9b4570c5dc550074140ac1d1677077fba29a26.zip |
PowerPC LE strlen
http://sourceware.org/ml/libc-alpha/2013-08/msg00097.html This is the first of nine patches adding little-endian support to the existing optimised string and memory functions. I did spend some time with a power7 simulator looking at cycle by cycle behaviour for memchr, but most of these patches have not been run on cpu simulators to check that we are going as fast as possible. I'm sure PowerPC can do better. However, the little-endian support mostly leaves main loops unchanged, so I'm banking on previous authors having done a good job on big-endian.. As with most code you stare at long enough, I found some improvements for big-endian too. Little-endian support for strlen. Like most of the string functions, I leave the main word or multiple-word loops substantially unchanged, just needing to modify the tail. Removing the branch in the power7 functions is just a tidy. .align produces a branch anyway. Modifying regs in the non-power7 functions is to suit the new little-endian tail. * sysdeps/powerpc/powerpc64/power7/strlen.S (strlen): Add little-endian support. Don't branch over align. * sysdeps/powerpc/powerpc32/power7/strlen.S: Likewise. * sysdeps/powerpc/powerpc64/strlen.S (strlen): Add little-endian support. Rearrange tmp reg use to suit. Comment. * sysdeps/powerpc/powerpc32/strlen.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/powerpc32/power7/strlen.S')
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/strlen.S | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/sysdeps/powerpc/powerpc32/power7/strlen.S b/sysdeps/powerpc/powerpc32/power7/strlen.S index b71a10f5c7..b08d6c028c 100644 --- a/sysdeps/powerpc/powerpc32/power7/strlen.S +++ b/sysdeps/powerpc/powerpc32/power7/strlen.S @@ -29,7 +29,11 @@ ENTRY (strlen) li r0,0 /* Word with null chars to use with cmpb. */ li r5,-1 /* MASK = 0xffffffffffffffff. */ lwz r12,0(r4) /* Load word from memory. */ +#ifdef __LITTLE_ENDIAN__ + slw r5,r5,r6 +#else srw r5,r5,r6 /* MASK = MASK >> padding. */ +#endif orc r9,r12,r5 /* Mask bits that are not part of the string. */ cmpb r10,r9,r0 /* Check for null bytes in WORD1. */ cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ @@ -47,9 +51,6 @@ ENTRY (strlen) cmpb r10,r12,r0 cmpwi cr7,r10,0 bne cr7,L(done) - b L(loop) /* We branch here (rather than falling through) - to skip the nops due to heavy alignment - of the loop below. */ /* Main loop to look for the end of the string. Since it's a small loop (< 8 instructions), align it to 32-bytes. */ @@ -86,9 +87,15 @@ L(loop): 0xff in the same position as the null byte in the original word from the string. Use that to calculate the length. */ L(done): - cntlzw r0,r10 /* Count leading zeroes before the match. */ +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntw r0, r9 /* Count the bits in the mask. */ +#else + cntlzw r0,r10 /* Count leading zeros before the match. */ +#endif subf r5,r3,r4 - srwi r0,r0,3 /* Convert leading zeroes to bytes. */ + srwi r0,r0,3 /* Convert leading zeros to bytes. */ add r3,r5,r0 /* Compute final length. */ blr END (strlen) |