diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-11-19 14:24:18 -0500 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-12-02 07:15:58 -0500 |
commit | 2e8a2de2dafa3238b5b58eecb407af6825a780cd (patch) | |
tree | 0a89b6df52343efcbd359cb0c0c07b2f0a8bcbe7 /sysdeps/powerpc/powerpc64/strspn.S | |
parent | 08f1e1d2bca9ef087813357780ec0bafe71c7d29 (diff) | |
download | glibc-2e8a2de2dafa3238b5b58eecb407af6825a780cd.tar.gz glibc-2e8a2de2dafa3238b5b58eecb407af6825a780cd.tar.xz glibc-2e8a2de2dafa3238b5b58eecb407af6825a780cd.zip |
powerpc: Add powerpc64 strspn optimization
This patch makes the POWER7 optimized strspn generic by using default doubleword stores to zero the hash, instead of VSX instructions. Performance on POWER7/POWER8 machines does not changed.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/strspn.S')
-rw-r--r-- | sysdeps/powerpc/powerpc64/strspn.S | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/strspn.S b/sysdeps/powerpc/powerpc64/strspn.S new file mode 100644 index 0000000000..daf5d5d747 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/strspn.S @@ -0,0 +1,144 @@ +/* Optimized strspn implementation for PowerPC64. + + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* size_t [r3] strspn (const char *string [r3], + const char *needleAccept [r4] */ + +/* Performance gains are grabbed through following techniques: + + > hashing of needle. + > hashing avoids scanning of duplicate entries in needle + across the string. + > unrolling when scanning for character in string + across hash table. */ + +/* Algorithm is as below: + 1. A empty hash table/dictionary is created comprising of + 256 ascii character set + 2. When hash entry is found in needle , the hash index + is initialized to 1 + 3. The string is scanned until end and for every character, + its corresponding hash index is compared. + 4. initial length of string (count) until first hit of + accept needle to be found is set to 0 + 4. If hash index is set to 1 for the index of string, + count is returned. + 5. Otherwise count is incremented and scanning continues + until end of string. */ + +#include <sysdep.h> + +EALIGN(strspn, 4, 0) + CALL_MCOUNT 3 + + /* PPC64 ELF ABI stack is aligned to 16 bytes. */ + addi r9,r1,-256 + /* Clear the table with 0 values */ + li r6, 0 + li r8, 4 + mtctr r8 + mr r10, r9 + .align 4 +L(zerohash): + std r6, 0(r10) + std r6, 8(r10) + std r6, 16(r10) + std r6, 24(r10) + std r6, 32(r10) + std r6, 40(r10) + std r6, 48(r10) + std r6, 56(r10) + addi r10, r10, 64 + bdnz L(zerohash) + + lbz r10,0(r4) + li r8, 1 /* r8=1, marker into hash if found in + needle */ + cmpdi cr7, r10, 0 /* accept needle is NULL */ + beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */ + + .align 4 /* align section to 16 byte boundary */ +L(hashing): + stbx r8, r9, r10 /* update hash with marker for the pivot of + the needle */ + lbzu r10, 1(r4) /* load needle into r10 and update to next */ + cmpdi cr7, r10, 0 /* if needle is has reached NULL, continue */ + bne cr7, L(hashing) /* loop to hash the needle */ + +L(skipHashing): + li r10, 0 /* load counter = 0 */ + b L(beginScan) + + .align 4 /* align section to 16 byte boundary */ +L(scanUnroll): + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret1stIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 1(r3) /* load string[1] into r8 */ + addi r10, r10, 4 /* increment counter */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret2ndIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 2(r3) /* load string[2] into r8 */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret3rdIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 3(r3) /* load string[3] into r8 */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + addi r3, r3, 4 /* unroll factor , increment string by 4 */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7,L(ret4thIndex) /* we have hit accept needle, return the + count */ + +L(beginScan): + lbz r8, 0(r3) /* load string[0] into r8 */ + addi r6, r10, 1 /* place holder for counter + 1 */ + addi r5, r10, 2 /* place holder for counter + 2 */ + addi r4, r10, 3 /* place holder for counter + 3 */ + cmpdi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + bne cr7, L(scanUnroll) /* continue scanning */ + +L(ret1stIndex): + mr r3, r10 /* update r3 for return */ + blr /* return */ + +L(ret2ndIndex): + mr r3, r6 /* update r3 for return */ + blr /* return */ + +L(ret3rdIndex): + mr r3, r5 /* update r3 for return */ + blr /* return */ + +L(ret4thIndex): + mr r3, r4 /* update r3 for return */ + blr /* done */ +END(strspn) +libc_hidden_builtin_def (strspn) |