diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/stpcpy.S | 24 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/strcpy.S | 274 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/stpcpy.S | 99 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/strcpy.S | 144 |
5 files changed, 418 insertions, 134 deletions
diff --git a/ChangeLog b/ChangeLog index c2e5261643..23f6b709a0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2013-10-04 Adhemerval Zanella <azanella@linux.vnet.ibm.com> + + * sysdeps/powerpc/powerpc64/strcpy.S (strcpy): Add word load/store + to provide a boost for large inputs with word alignment. + * sysdeps/powerpc/powerpc64/stpcpy.S (__stpcpy): Rewrite + implementation based on optimized PPC64 strcpy. + * sysdeps/powerpc/powerpc64/power7/strcpy.S: New file: optimized + strcpy for PPC64/POWER7 based on both doubleword and word load/store. + * sysdeps/powerpc/powerpc64/power7/stpcpy.S: New file: optimized + stpcpy for PPC64/POWER7 based on PPC64/POWER7 strcpy. + 2013-10-25 Ondřej Bílka <neleai@seznam.cz> [BZ 2801] diff --git a/sysdeps/powerpc/powerpc64/power7/stpcpy.S b/sysdeps/powerpc/powerpc64/power7/stpcpy.S new file mode 100644 index 0000000000..727dd06e74 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/stpcpy.S @@ -0,0 +1,24 @@ +/* Optimized stpcpy implementation for PowerPC64/POWER7. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPCPY +#include <sysdeps/powerpc/powerpc64/power7/strcpy.S> + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S new file mode 100644 index 0000000000..5c341a1483 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strcpy.S @@ -0,0 +1,274 @@ +/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Implements the function + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + or + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + if USE_AS_STPCPY is defined. It tries to use aligned memory accesses + when possible using the following algorithm: + + if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0)) + goto aligned_doubleword_copy; + if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0)) + goto aligned_word_copy; + if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL)) + goto same_alignment; + goto unaligned; + + The aligned comparison are made using cmpb instructions. */ + +#ifdef USE_AS_STPCPY +# define FUNC_NAME __stpcpy +#else +# define FUNC_NAME strcpy +#endif + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + CALL_MCOUNT 2 + +#define rTMP r0 +#ifdef USE_AS_STPCPY +#define rRTN r3 /* pointer to previous word/doubleword in dest */ +#else +#define rRTN r12 /* pointer to previous word/doubleword in dest */ +#endif +#define rSRC r4 /* pointer to previous word/doubleword in src */ +#define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */ +#define rWORD r6 /* current word from src */ +#define rALT r7 /* alternate word from src */ +#define rRTNAL r8 /* alignment of return pointer */ +#define rSRCAL r9 /* alignment of source pointer */ +#define rALCNT r10 /* bytes to read to reach 8 bytes alignment */ +#define rSUBAL r11 /* doubleword minus unaligned displacement */ + +#ifndef USE_AS_STPCPY +/* Save the dst pointer to use as return value. */ + mr rRTN, r3 +#endif + or rTMP, rSRC, rRTN + clrldi. rTMP, rTMP, 61 + bne L(check_word_alignment) + b L(aligned_doubleword_copy) + +L(same_alignment): +/* Src and dst with same alignment: align both to doubleword. */ + mr rALCNT, rRTN + lbz rWORD, 0(rSRC) + subfic rSUBAL, rRTNAL, 8 + addi rRTN, rRTN, 1 + addi rSRC, rSRC, 1 + cmpdi cr7, rWORD, 0 + stb rWORD, 0(rALCNT) + beq cr7, L(s2) + + add rALCNT, rALCNT, rSUBAL + subf rALCNT, rRTN, rALCNT + addi rALCNT, rALCNT, 1 + mtctr rALCNT + b L(s1) + + .align 4 +L(s0): + addi rSRC, rSRC, 1 + lbz rWORD, -1(rSRC) + cmpdi cr7, rWORD, 0 + stb rWORD, -1(rALCNT) + beqlr cr7 + mr rRTN, rALCNT +L(s1): + addi rALCNT, rRTN,1 + bdnz L(s0) + b L(aligned_doubleword_copy) + .align 4 +L(s2): + mr rRTN, rALCNT + blr + +/* For doubleword aligned memory, operate using doubleword load and stores. */ + .align 4 +L(aligned_doubleword_copy): + li rMASK, 0 + addi rRTN, rRTN, -8 + ld rWORD, 0(rSRC) + b L(g2) + + .align 4 +L(g0): ldu rALT, 8(rSRC) + stdu rWORD, 8(rRTN) + cmpb rTMP, rALT, rMASK + cmpdi rTMP, 0 + bne L(g1) + ldu rWORD, 8(rSRC) + stdu rALT, 8(rRTN) +L(g2): cmpb rTMP, rWORD, rMASK + cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */ + beq L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): +#ifdef __LITTLE_ENDIAN__ + extrdi. rTMP, rALT, 8, 56 + stbu rALT, 8(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 48 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 40 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 32 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 24 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 16 + stbu rTMP, 1(rRTN) + beqlr- + extrdi. rTMP, rALT, 8, 8 + stbu rTMP, 1(rRTN) + beqlr- + extrdi rTMP, rALT, 8, 0 + stbu rTMP, 1(rRTN) +#else + extrdi. rTMP, rALT, 8, 0 + stbu rTMP, 8(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 8 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 16 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 24 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 32 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 40 + stbu rTMP, 1(rRTN) + beqlr + extrdi. rTMP, rALT, 8, 48 + stbu rTMP, 1(rRTN) + beqlr + stbu rALT, 1(rRTN) +#endif + blr + +L(check_word_alignment): + clrldi. rTMP, rTMP, 62 + beq L(aligned_word_copy) + rldicl rRTNAL, rRTN, 0, 61 + rldicl rSRCAL, rSRC, 0, 61 + cmpld cr7, rSRCAL, rRTNAL + beq cr7, L(same_alignment) + b L(unaligned) + +/* For word aligned memory, operate using word load and stores. */ + .align 4 +L(aligned_word_copy): + li rMASK, 0 + addi rRTN, rRTN, -4 + lwz rWORD, 0(rSRC) + b L(g5) + + .align 4 +L(g3): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rRTN) + cmpb rTMP, rALT, rMASK + cmpwi rTMP, 0 + bne L(g4) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rRTN) +L(g5): cmpb rTMP, rWORD, rMASK + cmpwi rTMP, 0 /* If rTMP is 0, no null in word. */ + beq L(g3) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g4): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stbu rALT, 4(rRTN) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stbu rTMP, 1(rRTN) +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rRTN) + beqlr + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr + stbu rALT, 1(rRTN) +#endif + blr + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 +L(unaligned): + lbz rWORD, 0(rSRC) + addi rRTN, rRTN, -1 + cmpdi rWORD, 0 + beq L(u2) + + .align 5 +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rRTN) + cmpdi rALT, 0 + beq L(u1) + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rRTN) + cmpdi rWORD, 0 + beq L(u2) + lbzu rALT, 1(rSRC) + stbu rWORD, 1(rRTN) + cmpdi rALT, 0 + beq L(u1) + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rRTN) + cmpdi rWORD, 0 + bne L(u0) +L(u2): stbu rWORD, 1(rRTN) + blr +L(u1): stbu rALT, 1(rRTN) + blr +END (FUNC_NAME) + +#ifndef USE_AS_STPCPY +libc_hidden_builtin_def (strcpy) +#endif diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S index c0b39729e2..09aa3be6b5 100644 --- a/sysdeps/powerpc/powerpc64/stpcpy.S +++ b/sysdeps/powerpc/powerpc64/stpcpy.S @@ -16,103 +16,8 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> - -/* See strlen.s for comments on how the end-of-string testing works. */ - -/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ - -EALIGN (__stpcpy, 4, 0) - CALL_MCOUNT 2 - -#define rTMP r0 -#define rRTN r3 -#define rDEST r3 /* pointer to previous word in dest */ -#define rSRC r4 /* pointer to previous word in src */ -#define rWORD r6 /* current word from src */ -#define rFEFE r7 /* 0xfefefeff */ -#define r7F7F r8 /* 0x7f7f7f7f */ -#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ -#define rALT r10 /* alternate word from src */ - - or rTMP, rSRC, rDEST - clrldi. rTMP, rTMP, 62 - addi rDEST, rDEST, -4 - bne L(unaligned) - - lis rFEFE, -0x101 - lis r7F7F, 0x7f7f - lwz rWORD, 0(rSRC) - addi rFEFE, rFEFE, -0x101 - addi r7F7F, r7F7F, 0x7f7f - b L(g2) - -L(g0): lwzu rALT, 4(rSRC) - stwu rWORD, 4(rDEST) - add rTMP, rFEFE, rALT - nor rNEG, r7F7F, rALT - and. rTMP, rTMP, rNEG - bne- L(g1) - lwzu rWORD, 4(rSRC) - stwu rALT, 4(rDEST) -L(g2): add rTMP, rFEFE, rWORD - nor rNEG, r7F7F, rWORD - and. rTMP, rTMP, rNEG - beq+ L(g0) - - mr rALT, rWORD -/* We've hit the end of the string. Do the rest byte-by-byte. */ -L(g1): -#ifdef __LITTLE_ENDIAN__ - rlwinm. rTMP, rALT, 0, 24, 31 - stbu rALT, 4(rDEST) - beqlr- - rlwinm. rTMP, rALT, 24, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm. rTMP, rALT, 16, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm rTMP, rALT, 8, 24, 31 - stbu rTMP, 1(rDEST) - blr -#else - rlwinm. rTMP, rALT, 8, 24, 31 - stbu rTMP, 4(rDEST) - beqlr- - rlwinm. rTMP, rALT, 16, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm. rTMP, rALT, 24, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - stbu rALT, 1(rDEST) - blr -#endif - -/* Oh well. In this case, we just do a byte-by-byte copy. */ - .align 4 - nop -L(unaligned): - lbz rWORD, 0(rSRC) - addi rDEST, rDEST, 3 - cmpwi rWORD, 0 - beq- L(u2) - -L(u0): lbzu rALT, 1(rSRC) - stbu rWORD, 1(rDEST) - cmpwi rALT, 0 - beq- L(u1) - nop /* Let 601 load start of loop. */ - lbzu rWORD, 1(rSRC) - stbu rALT, 1(rDEST) - cmpwi rWORD, 0 - bne+ L(u0) -L(u2): stbu rWORD, 1(rDEST) - blr -L(u1): stbu rALT, 1(rDEST) - blr -END (__stpcpy) +#define USE_AS_STPCPY +#include <sysdeps/powerpc/powerpc64/strcpy.S> weak_alias (__stpcpy, stpcpy) libc_hidden_def (__stpcpy) diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S index a7fd85bad4..793325d7be 100644 --- a/sysdeps/powerpc/powerpc64/strcpy.S +++ b/sysdeps/powerpc/powerpc64/strcpy.S @@ -22,25 +22,38 @@ /* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ -EALIGN (strcpy, 4, 0) +#ifdef USE_AS_STPCPY +# define FUNC_NAME __stpcpy +#else +# define FUNC_NAME strcpy +#endif + +EALIGN (FUNC_NAME, 4, 0) CALL_MCOUNT 2 #define rTMP r0 -#define rRTN r3 /* incoming DEST arg preserved as result */ -#define rSRC r4 /* pointer to previous word in src */ -#define rDEST r5 /* pointer to previous word in dest */ +#ifdef USE_AS_STPCPY +#define rRTN r3 /* pointer to previous word/doubleword in dest */ +#else +#define rRTN r12 /* pointer to previous word/doubleword in dest */ +#endif +#define rSRC r4 /* pointer to previous word/doubleword in src */ #define rWORD r6 /* current word from src */ -#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ -#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ -#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rFEFE r7 /* constant 0xfefefeff | 0xfefefefefefefeff */ +#define r7F7F r8 /* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | r7F7F) */ #define rALT r10 /* alternate word from src */ - dcbt 0,rSRC +#ifndef USE_AS_STPCPY +/* Save the dst pointer to use as return value. */ + mr rRTN, r3 +#endif or rTMP, rSRC, rRTN clrldi. rTMP, rTMP, 61 - addi rDEST, rRTN, -8 - dcbtst 0,rRTN - bne L(unaligned) + bne L(check_word_alignment) + +/* For doubleword aligned memory, operate using doubleword load and stores. */ + addi rRTN, rRTN, -8 lis rFEFE, -0x101 lis r7F7F, 0x7f7f @@ -53,13 +66,13 @@ EALIGN (strcpy, 4, 0) b L(g2) L(g0): ldu rALT, 8(rSRC) - stdu rWORD, 8(rDEST) + stdu rWORD, 8(rRTN) add rTMP, rFEFE, rALT nor rNEG, r7F7F, rALT and. rTMP, rTMP, rNEG bne- L(g1) ldu rWORD, 8(rSRC) - stdu rALT, 8(rDEST) + stdu rALT, 8(rRTN) L(g2): add rTMP, rFEFE, rWORD nor rNEG, r7F7F, rWORD and. rTMP, rTMP, rNEG @@ -70,77 +83,134 @@ L(g2): add rTMP, rFEFE, rWORD L(g1): #ifdef __LITTLE_ENDIAN__ extrdi. rTMP, rALT, 8, 56 - stb rALT, 8(rDEST) + stbu rALT, 8(rRTN) beqlr- extrdi. rTMP, rALT, 8, 48 - stb rTMP, 9(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 40 - stb rTMP, 10(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 32 - stb rTMP, 11(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 24 - stb rTMP, 12(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 16 - stb rTMP, 13(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 8 - stb rTMP, 14(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi rTMP, rALT, 8, 0 - stb rTMP, 15(rDEST) - blr + stbu rTMP, 1(rRTN) #else extrdi. rTMP, rALT, 8, 0 - stb rTMP, 8(rDEST) + stbu rTMP, 8(rRTN) beqlr- extrdi. rTMP, rALT, 8, 8 - stb rTMP, 9(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 16 - stb rTMP, 10(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 24 - stb rTMP, 11(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 32 - stb rTMP, 12(rDEST) - beqlr- + stbu rTMP, 1(rRTN) + beqlr extrdi. rTMP, rALT, 8, 40 - stb rTMP, 13(rDEST) + stbu rTMP, 1(rRTN) beqlr- extrdi. rTMP, rALT, 8, 48 - stb rTMP, 14(rDEST) + stbu rTMP, 1(rRTN) beqlr- - stb rALT, 15(rDEST) + stbu rALT, 1(rRTN) +#endif blr + +L(check_word_alignment): + clrldi. rTMP, rTMP, 62 + bne L(unaligned) + +/* For word aligned memory, operate using word load and stores. */ + addi rRTN, rRTN, -4 + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g5) + +L(g3): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rRTN) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g4) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rRTN) +L(g5): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g3) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g4): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stbu rALT, 4(rRTN) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stbu rTMP, 1(rRTN) +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rRTN) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rRTN) + beqlr- + stbu rALT, 1(rRTN) #endif + blr /* Oh well. In this case, we just do a byte-by-byte copy. */ .align 4 nop L(unaligned): lbz rWORD, 0(rSRC) - addi rDEST, rRTN, -1 + addi rRTN, rRTN, -1 cmpwi rWORD, 0 beq- L(u2) L(u0): lbzu rALT, 1(rSRC) - stbu rWORD, 1(rDEST) + stbu rWORD, 1(rRTN) cmpwi rALT, 0 beq- L(u1) nop /* Let 601 load start of loop. */ lbzu rWORD, 1(rSRC) - stbu rALT, 1(rDEST) + stbu rALT, 1(rRTN) cmpwi rWORD, 0 bne+ L(u0) -L(u2): stb rWORD, 1(rDEST) +L(u2): stbu rWORD, 1(rRTN) blr -L(u1): stb rALT, 1(rDEST) +L(u1): stbu rALT, 1(rRTN) blr +END (FUNC_NAME) -END (strcpy) +#ifndef USE_AS_STPCPY libc_hidden_builtin_def (strcpy) +#endif |