diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-11-19 14:24:18 -0500 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-12-02 07:15:58 -0500 |
commit | 2e8a2de2dafa3238b5b58eecb407af6825a780cd (patch) | |
tree | 0a89b6df52343efcbd359cb0c0c07b2f0a8bcbe7 /sysdeps/powerpc/powerpc64 | |
parent | 08f1e1d2bca9ef087813357780ec0bafe71c7d29 (diff) | |
download | glibc-2e8a2de2dafa3238b5b58eecb407af6825a780cd.tar.gz glibc-2e8a2de2dafa3238b5b58eecb407af6825a780cd.tar.xz glibc-2e8a2de2dafa3238b5b58eecb407af6825a780cd.zip |
powerpc: Add powerpc64 strspn optimization
This patch makes the POWER7 optimized strspn generic by using default doubleword stores to zero the hash, instead of VSX instructions. Performance on POWER7/POWER8 machines does not changed.
Diffstat (limited to 'sysdeps/powerpc/powerpc64')
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/Makefile | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | 8 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S | 40 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c | 33 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strspn.c | 31 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/strspn.S (renamed from sysdeps/powerpc/powerpc64/power7/strspn.S) | 73 |
6 files changed, 27 insertions, 160 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index abc9d2e973..d6de5a539a 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -15,7 +15,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \ strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \ strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \ - strspn-power7 strspn-ppc64 strcspn-power7 strcspn-ppc64 \ + strcspn-power7 strcspn-ppc64 \ strpbrk-power7 strpbrk-ppc64 strncpy-power7 strncpy-ppc64 \ stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \ strcat-power7 strcat-ppc64 memmove-power7 memmove-ppc64 \ diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 06d5be9efb..4a9e5238dd 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -272,14 +272,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ppc)) - /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ - IFUNC_IMPL (i, name, strspn, - IFUNC_IMPL_ADD (array, i, strspn, - hwcap & PPC_FEATURE_HAS_VSX, - __strspn_power7) - IFUNC_IMPL_ADD (array, i, strspn, 1, - __strspn_ppc)) - /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ IFUNC_IMPL (i, name, strcspn, IFUNC_IMPL_ADD (array, i, strcspn, diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S deleted file mode 100644 index 889dfeea8e..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S +++ /dev/null @@ -1,40 +0,0 @@ -/* Optimized strspn implementation for POWER7. - Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#undef EALIGN -#define EALIGN(name, alignt, words) \ - .section ".text"; \ - ENTRY_2(__strspn_power7) \ - .align ALIGNARG(alignt); \ - EALIGN_W_##words; \ - BODY_LABEL(__strspn_power7): \ - cfi_startproc; \ - LOCALENTRY(__strspn_power7) - -#undef END -#define END(name) \ - cfi_endproc; \ - TRACEBACK(__strspn_power7) \ - END_2(__strspn_power7) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include <sysdeps/powerpc/powerpc64/power7/strspn.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c deleted file mode 100644 index b35af3e016..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> - -#define STRSPN __strspn_ppc -#undef weak_alias -#define weak_alias(name, aliasname) \ - extern __typeof (__strspn_ppc) aliasname \ - __attribute__ ((weak, alias ("__strspn_ppc"))); -#if IS_IN (libc) && defined(SHARED) -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1(__strspn_ppc, __GI_strspn, __strspn_ppc); -#endif - -extern __typeof (strspn) __strspn_ppc attribute_hidden; - -#include <string/strspn.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/sysdeps/powerpc/powerpc64/multiarch/strspn.c deleted file mode 100644 index dd9ec18ba3..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strspn.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Multiple versions of strspn. PowerPC64 version. - Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <string.h> -# include <shlib-compat.h> -# include "init-arch.h" - -extern __typeof (strspn) __strspn_ppc attribute_hidden; -extern __typeof (strspn) __strspn_power7 attribute_hidden; - -libc_ifunc (strspn, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __strspn_power7 - : __strspn_ppc); -#endif diff --git a/sysdeps/powerpc/powerpc64/power7/strspn.S b/sysdeps/powerpc/powerpc64/strspn.S index d587a673f2..daf5d5d747 100644 --- a/sysdeps/powerpc/powerpc64/power7/strspn.S +++ b/sysdeps/powerpc/powerpc64/strspn.S @@ -1,4 +1,4 @@ -/* Optimized strspn implementation for PowerPC64/POWER7. +/* Optimized strspn implementation for PowerPC64. Copyright (C) 2014 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -25,8 +25,6 @@ > hashing of needle. > hashing avoids scanning of duplicate entries in needle across the string. - > initializing the hash table with Vector instructions - by quadword access. > unrolling when scanning for character in string across hash table. */ @@ -46,55 +44,36 @@ #include <sysdep.h> -#undef strspn - - .machine power7 EALIGN(strspn, 4, 0) - CALL_MCOUNT 2 - - lbz r10, 0(r4) /* load r10 with needle (r4) */ - addi r9, r1, -256 /* r9 is a hash of 256 bytes */ - - li r5, 16 /* set r5 = 16 as offset */ - li r6, 32 /* set r6 = 32 as offset */ - li r8, 48 /* set r8 = 48 as offset */ - -/*Iniatliaze hash table with Zeroes in double indexed quadword accesses */ - xxlxor v0, v0, v0 /* prepare for initializing hash */ - - stxvd2x v0, r0, r9 /* initialize 1st quadword */ - stxvd2x v0, r9, r5 - stxvd2x v0, r9, r6 - stxvd2x v0, r9, r8 /* initialize 4th quadword */ - - addi r11, r9, 64 /* r11 is index to hash */ - - stxvd2x v0, r0, r11 /* initialize 5th quadword */ - stxvd2x v0, r11, r5 - stxvd2x v0, r11, r6 - stxvd2x v0, r11, r8 /* initialize 8th quadword */ - - addi r11, r9, 128 /* r11 is index to hash */ - - stxvd2x v0, r0, r11 /* initialize 9th quadword */ - stxvd2x v0, r11, r5 - stxvd2x v0, r11, r6 - stxvd2x v0, r11, r8 /* initialize 12th quadword */ - - addi r11, r9, 192 /* r11 is index to hash */ - - stxvd2x v0, r0, r11 /* initialize 13th quadword */ - stxvd2x v0, r11, r5 - stxvd2x v0, r11, r6 - stxvd2x v0, r11, r8 /* initialize 16th quadword */ - + CALL_MCOUNT 3 + + /* PPC64 ELF ABI stack is aligned to 16 bytes. */ + addi r9,r1,-256 + /* Clear the table with 0 values */ + li r6, 0 + li r8, 4 + mtctr r8 + mr r10, r9 + .align 4 +L(zerohash): + std r6, 0(r10) + std r6, 8(r10) + std r6, 16(r10) + std r6, 24(r10) + std r6, 32(r10) + std r6, 40(r10) + std r6, 48(r10) + std r6, 56(r10) + addi r10, r10, 64 + bdnz L(zerohash) + + lbz r10,0(r4) li r8, 1 /* r8=1, marker into hash if found in needle */ - cmpdi cr7, r10, 0 /* accept needle is NULL */ beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */ - .p2align 4 /* align section to 16 byte boundary */ + .align 4 /* align section to 16 byte boundary */ L(hashing): stbx r8, r9, r10 /* update hash with marker for the pivot of the needle */ @@ -106,7 +85,7 @@ L(skipHashing): li r10, 0 /* load counter = 0 */ b L(beginScan) - .p2align 4 /* align section to 16 byte boundary */ + .align 4 /* align section to 16 byte boundary */ L(scanUnroll): lbzx r8, r9, r8 /* load r8 with hash value at index */ cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found |