diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-03-11 17:29:43 -0300 |
---|---|---|
committer | Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> | 2013-03-15 09:48:31 -0300 |
commit | a2f7665683ca5913883db08ee78dc9dd7bd6dda5 (patch) | |
tree | 136ded6fee18aa5c0899654f616ab8663446f3e0 | |
parent | ca7a6adf22bfc375452aba1f0606b97e89659edc (diff) | |
download | glibc-a2f7665683ca5913883db08ee78dc9dd7bd6dda5.tar.gz glibc-a2f7665683ca5913883db08ee78dc9dd7bd6dda5.tar.xz glibc-a2f7665683ca5913883db08ee78dc9dd7bd6dda5.zip |
PowerPC Add 64-bit multilib implementation of memcpy
Move and rename specialized memcpy implementation to multilib folder and add IFUNC memcpy source.
-rw-r--r-- | ChangeLog | 21 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | 62 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S (renamed from sysdeps/powerpc/powerpc64/a2/memcpy.S) | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S (renamed from sysdeps/powerpc/powerpc64/cell/memcpy.S) | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S (renamed from sysdeps/powerpc/powerpc64/power4/memcpy.S) | 7 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S (renamed from sysdeps/powerpc/powerpc64/power6/memcpy.S) | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S (renamed from sysdeps/powerpc/powerpc64/power7/memcpy.S) | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy.S | 92 |
9 files changed, 190 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog index d4cb678638..098973b4c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2013-03-07 Adhemerval Zanella <azanella@linux.vnet.ibm.com> + Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> + + * sysdeps/powerpc/powerpc64/a2/memcpy.S: Moved to multiarch folder. + * sysdeps/powerpc/powerpc64/cell/memcpy.S: Moved to multiarch folder. + * sysdeps/powerpc/powerpc64/power4/memcpy.S: Moved to multiarch folder. + * sysdeps/powerpc/powerpc64/power6/memcpy.S: Moved to multiarch folder. + * sysdeps/powerpc/powerpc64/power7/memcpy.S: Moved to multiarch folder. + * sysdeps/powerpc/powerpc64/multiarch/Makefile: Multiarch makefile. + * sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S: Moved from a2 folder. + * sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S: Moved from cell + folder. + * sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S: Moved from power4 + folder. + * sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S: Moved from power6 + folder. + * sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S: Moved from power7 + folder. + * sysdeps/powerpc/powerpc64/multiarch/memcpy.S: Multiarch implementation + using IFUNC extension. + 2013-03-07 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> * sysdeps/powerpc/powerpc32/multiarch/Makefile (sysdep_routines): diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile new file mode 100644 index 0000000000..5ba03ca84b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-a2 memcpy-cell memcpy-power4 memcpy-power6 \ + memcpy-power7 +endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..760f46c764 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -0,0 +1,62 @@ +/* Enumerate available IFUNC implementations of a function. PowerPC32 version. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 5 + +/* Some of the . */ +#define PPC_POWER4 (PPC_FEATURE_POWER4|PPC_FEATURE_ARCH_2_05| \ + PPC_FEATURE_ARCH_2_06) +#define PPC_POWER6 (PPC_FEATURE_ARCH_2_05|PPC_FEATURE_ARCH_2_06) + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + uint32_t hwcap; + + hwcap = GLRO(dl_hwcap); + +#ifdef SHARED + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power7) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, + __memcpy_power_a2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_POWER6, + __memcpy_power6) + IFUNC_IMPL_ADD (array, i, memcpy, + hwcap & (PPC_FEATURE_CELL_BE >> 16), + __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_POWER4, + __memcpy_power4) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc64)) +#endif + + return i; +} diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S index 84c82bb768..a2834f3345 100644 --- a/sysdeps/powerpc/powerpc64/a2/memcpy.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S @@ -30,7 +30,7 @@ .machine a2 -EALIGN (memcpy, 5, 0) +EALIGN (__memcpy_power_a2, 5, 0) CALL_MCOUNT 3 dcbt 0,r4 /* Prefetch ONE SRC cacheline */ @@ -520,5 +520,4 @@ L(endloop2_128): b L(lessthancacheline) -END_GEN_TB (memcpy,TB_TOCLESS) -libc_hidden_builtin_def (memcpy) +END_GEN_TB (__memcpy_power_a2,TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/cell/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S index a271965dd7..c8dd87fa03 100644 --- a/sysdeps/powerpc/powerpc64/cell/memcpy.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S @@ -39,7 +39,7 @@ .align 7 -EALIGN (memcpy, 5, 0) +EALIGN (__memcpy_cell, 5, 0) CALL_MCOUNT 3 dcbt 0,r4 /* Prefetch ONE SRC cacheline */ @@ -238,5 +238,4 @@ EALIGN (memcpy, 5, 0) stb r0,0(r6) 1: blr -END_GEN_TB (memcpy,TB_TOCLESS) -libc_hidden_builtin_def (memcpy) +END_GEN_TB (__memcpy_cell,TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/power4/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S index c43d1d2e4e..cd167e253c 100644 --- a/sysdeps/powerpc/powerpc64/power4/memcpy.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S @@ -34,7 +34,7 @@ Each case has a optimized unrolled loop. */ .machine power4 -EALIGN (memcpy, 5, 0) +EALIGN (__memcpy_power4, 5, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -93,7 +93,7 @@ EALIGN (memcpy, 5, 0) /* Move doublewords where destination and source are DW aligned. Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. - If the copy is not an exact multiple of 32 bytes, 1-3 + If the copy is not an exact multiple of 32 bytes, 1-3 doublewords are copied as needed to set up the main loop. After the main loop exits there may be a tail of 1-7 bytes. These byte are copied a word/halfword/byte at a time as needed to preserve alignment. */ @@ -411,5 +411,4 @@ EALIGN (memcpy, 5, 0) ld 31,-8(1) ld 3,-16(1) blr -END_GEN_TB (memcpy,TB_TOCLESS) -libc_hidden_builtin_def (memcpy) +END_GEN_TB (__memcpy_power4,TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/power6/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S index 55c0d71184..e0eba0ac27 100644 --- a/sysdeps/powerpc/powerpc64/power6/memcpy.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S @@ -41,7 +41,7 @@ for the destination. */ .machine "power6" -EALIGN (memcpy, 7, 0) +EALIGN (__memcpy_power6, 7, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -1163,5 +1163,4 @@ L(du_done): ld 31,-8(1) ld 3,-16(1) blr -END_GEN_TB (memcpy,TB_TOCLESS) -libc_hidden_builtin_def (memcpy) +END_GEN_TB (__memcpy_power6,TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S index 800a9f1bb1..e6ee4e2570 100644 --- a/sysdeps/powerpc/powerpc64/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S @@ -24,7 +24,7 @@ Returns 'dst'. */ .machine power7 -EALIGN (memcpy, 5, 0) +EALIGN (__memcpy_power7, 5, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -500,5 +500,4 @@ L(end_unaligned_loop): ld 3,-16(1) blr -END_GEN_TB (memcpy,TB_TOCLESS) -libc_hidden_builtin_def (memcpy) +END_GEN_TB (__memcpy_power7,TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy.S new file mode 100644 index 0000000000..92c2e2a3b4 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy.S @@ -0,0 +1,92 @@ +/* Multiple versions of memcpy PowerPC64. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && !defined NOT_IN_libc + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED + .tc _rtld_global_ro[TC],_rtld_global_ro +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" + +ENTRY(memcpy) + .type memcpy, @gnu_indirect_function + ld r5,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) +# else + ld r5,0(r5) /* Load extern _dl_hwcap. */ +# endif + rldicl. r0,r5,57,63 + bne- L(power7) + rldicl. r0,r5,56,63 + bne- L(powerA2) + rldicl. r0,r5,52,63 + bne- L(power6) + rldicl. r0,r5,48,63 + bne- L(powercell) + rldicl. r0,r5,45,63 + bne- L(power4) + ld r3,__memcpy_ppc64@got(r2) + blr +L(power7): + ld r3,__memcpy_power7@got(r2) + blr +L(powerA2): + ld r3,__memcpy_power_a2@got(r2) + blr +L(power6): + ld r3,__memcpy_power6@got(r2) + blr +L(powercell): + ld r3,__memcpy_cell@got(r2) + blr +L(power4): + ld r3,__memcpy_power4@got(r2) + blr +END(memcpy) + +# undef EALIGN +# define EALIGN(name, alignt, words) \ + ENTRY_2(__memcpy_ppc64) \ +BODY_LABEL(__memcpy_ppc64): \ + cfi_startproc; + +# undef END_GEN_TB +# define END_GEN_TB(name, mask) \ + cfi_endproc; \ + TRACEBACK_MASK(__memcpy_ppc64,mask) \ + END_2(__memcpy_ppc64) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ppc64 + +#endif + +#include "../memcpy.S" |