diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-07-15 12:19:09 -0400 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-09-10 07:39:46 -0400 |
commit | 71ae86478edc7b21872464f43fb29ff650c1681a (patch) | |
tree | a75679fa464a1d19543020ef0c4f4f982d099d99 /sysdeps/powerpc/powerpc64/multiarch | |
parent | 3b473fecdf4c52989cd915b649bb6d26c042d048 (diff) | |
download | glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.gz glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.xz glibc-71ae86478edc7b21872464f43fb29ff650c1681a.zip |
PowerPC: memset optimization for POWER8/PPC64
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch')
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/Makefile | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/bzero.c | 11 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memset-power8.S | 43 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memset.c | 11 |
5 files changed, 64 insertions, 9 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index 0de3804c0c..abc9d2e973 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -2,7 +2,7 @@ ifeq ($(subdir),string) sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ memcpy-power4 memcpy-ppc64 memcmp-power7 memcmp-power4 \ memcmp-ppc64 memset-power7 memset-power6 memset-power4 \ - memset-ppc64 \ + memset-ppc64 memset-power8 \ mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ rawmemchr-ppc64 strlen-power7 strlen-ppc64 strnlen-power7 \ diff --git a/sysdeps/powerpc/powerpc64/multiarch/bzero.c b/sysdeps/powerpc/powerpc64/multiarch/bzero.c index ed83541fa5..298cf005a1 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/bzero.c +++ b/sysdeps/powerpc/powerpc64/multiarch/bzero.c @@ -26,14 +26,17 @@ extern __typeof (bzero) __bzero_ppc attribute_hidden; extern __typeof (bzero) __bzero_power4 attribute_hidden; extern __typeof (bzero) __bzero_power6 attribute_hidden; extern __typeof (bzero) __bzero_power7 attribute_hidden; +extern __typeof (bzero) __bzero_power8 attribute_hidden; libc_ifunc (__bzero, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __bzero_power7 : - (hwcap & PPC_FEATURE_ARCH_2_05) + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __bzero_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bzero_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) ? __bzero_power6 : (hwcap & PPC_FEATURE_POWER4) - ? __bzero_power4 + ? __bzero_power4 : __bzero_ppc); weak_alias (__bzero, bzero) diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index a574487f2f..06d5be9efb 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -34,6 +34,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, size_t i = 0; unsigned long int hwcap = GLRO(dl_hwcap); + unsigned long int hwcap2 = GLRO(dl_hwcap2); + /* hwcap contains only the latest supported ISA, the code checks which is and fills the previous supported ones. */ if (hwcap & PPC_FEATURE_ARCH_2_06) @@ -69,6 +71,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memset_power8) IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, __memset_power7) IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, @@ -134,6 +138,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */ IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __bzero_power8) IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, __bzero_power7) IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S new file mode 100644 index 0000000000..e8a604b000 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S @@ -0,0 +1,43 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__memset_power8) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__memset_power8): \ + cfi_startproc; \ + LOCALENTRY(__memset_power8) + +#undef END_GEN_TB +#define END_GEN_TB(name, mask) \ + cfi_endproc; \ + TRACEBACK_MASK(__memset_power8,mask) \ + END_2(__memset_power8) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power8 + +#include <sysdeps/powerpc/powerpc64/power8/memset.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset.c b/sysdeps/powerpc/powerpc64/multiarch/memset.c index aa2ae7056e..9c7ed10c87 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memset.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memset.c @@ -32,16 +32,19 @@ extern __typeof (__redirect_memset) __memset_ppc attribute_hidden; extern __typeof (__redirect_memset) __memset_power4 attribute_hidden; extern __typeof (__redirect_memset) __memset_power6 attribute_hidden; extern __typeof (__redirect_memset) __memset_power7 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power8 attribute_hidden; /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ libc_ifunc (__libc_memset, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __memset_power7 : - (hwcap & PPC_FEATURE_ARCH_2_05) + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memset_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memset_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) ? __memset_power6 : (hwcap & PPC_FEATURE_POWER4) - ? __memset_power4 + ? __memset_power4 : __memset_ppc); #undef memset |