From 69f13dbf06c6195de0ada8632271d58ca3cf55da Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Thu, 26 Sep 2013 09:29:19 -0500 Subject: PowerPC: strcpy/stpcpy optimization for PPC64/POWER7 This patch intends to unify both strcpy and stpcpy implementationsi for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation is to provide both doubleword and word aligned memory access. For PPC64/POWER7 is also provide doubleword and word memory access, remove the branch hints, use the cmpb instruction for compare doubleword/words, and add an optimization for inputs of same alignment. --- sysdeps/powerpc/powerpc64/stpcpy.S | 99 +------------------------------------- 1 file changed, 2 insertions(+), 97 deletions(-) (limited to 'sysdeps/powerpc/powerpc64/stpcpy.S') diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S index c0b39729e2..09aa3be6b5 100644 --- a/sysdeps/powerpc/powerpc64/stpcpy.S +++ b/sysdeps/powerpc/powerpc64/stpcpy.S @@ -16,103 +16,8 @@ License along with the GNU C Library; if not, see . */ -#include - -/* See strlen.s for comments on how the end-of-string testing works. */ - -/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ - -EALIGN (__stpcpy, 4, 0) - CALL_MCOUNT 2 - -#define rTMP r0 -#define rRTN r3 -#define rDEST r3 /* pointer to previous word in dest */ -#define rSRC r4 /* pointer to previous word in src */ -#define rWORD r6 /* current word from src */ -#define rFEFE r7 /* 0xfefefeff */ -#define r7F7F r8 /* 0x7f7f7f7f */ -#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ -#define rALT r10 /* alternate word from src */ - - or rTMP, rSRC, rDEST - clrldi. rTMP, rTMP, 62 - addi rDEST, rDEST, -4 - bne L(unaligned) - - lis rFEFE, -0x101 - lis r7F7F, 0x7f7f - lwz rWORD, 0(rSRC) - addi rFEFE, rFEFE, -0x101 - addi r7F7F, r7F7F, 0x7f7f - b L(g2) - -L(g0): lwzu rALT, 4(rSRC) - stwu rWORD, 4(rDEST) - add rTMP, rFEFE, rALT - nor rNEG, r7F7F, rALT - and. rTMP, rTMP, rNEG - bne- L(g1) - lwzu rWORD, 4(rSRC) - stwu rALT, 4(rDEST) -L(g2): add rTMP, rFEFE, rWORD - nor rNEG, r7F7F, rWORD - and. rTMP, rTMP, rNEG - beq+ L(g0) - - mr rALT, rWORD -/* We've hit the end of the string. Do the rest byte-by-byte. */ -L(g1): -#ifdef __LITTLE_ENDIAN__ - rlwinm. rTMP, rALT, 0, 24, 31 - stbu rALT, 4(rDEST) - beqlr- - rlwinm. rTMP, rALT, 24, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm. rTMP, rALT, 16, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm rTMP, rALT, 8, 24, 31 - stbu rTMP, 1(rDEST) - blr -#else - rlwinm. rTMP, rALT, 8, 24, 31 - stbu rTMP, 4(rDEST) - beqlr- - rlwinm. rTMP, rALT, 16, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - rlwinm. rTMP, rALT, 24, 24, 31 - stbu rTMP, 1(rDEST) - beqlr- - stbu rALT, 1(rDEST) - blr -#endif - -/* Oh well. In this case, we just do a byte-by-byte copy. */ - .align 4 - nop -L(unaligned): - lbz rWORD, 0(rSRC) - addi rDEST, rDEST, 3 - cmpwi rWORD, 0 - beq- L(u2) - -L(u0): lbzu rALT, 1(rSRC) - stbu rWORD, 1(rDEST) - cmpwi rALT, 0 - beq- L(u1) - nop /* Let 601 load start of loop. */ - lbzu rWORD, 1(rSRC) - stbu rALT, 1(rDEST) - cmpwi rWORD, 0 - bne+ L(u0) -L(u2): stbu rWORD, 1(rDEST) - blr -L(u1): stbu rALT, 1(rDEST) - blr -END (__stpcpy) +#define USE_AS_STPCPY +#include weak_alias (__stpcpy, stpcpy) libc_hidden_def (__stpcpy) -- cgit 1.4.1