about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/stpcpy.S
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2013-09-26 09:29:19 -0500
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2013-10-25 13:28:24 -0500
commit69f13dbf06c6195de0ada8632271d58ca3cf55da (patch)
treec2611636127f33fdc43bb51a6ac0f2f97795136c /sysdeps/powerpc/powerpc64/stpcpy.S
parent151659f6371ce39a488fd132a5c8ce5e3bba983c (diff)
downloadglibc-69f13dbf06c6195de0ada8632271d58ca3cf55da.tar.gz
glibc-69f13dbf06c6195de0ada8632271d58ca3cf55da.tar.xz
glibc-69f13dbf06c6195de0ada8632271d58ca3cf55da.zip
PowerPC: strcpy/stpcpy optimization for PPC64/POWER7
This patch intends to unify both strcpy and stpcpy implementationsi
for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation
is to provide both doubleword and word aligned memory access.

For PPC64/POWER7 is also provide doubleword and word memory access,
remove the branch hints, use the cmpb instruction for compare
doubleword/words, and add an optimization for inputs of same alignment.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/stpcpy.S')
-rw-r--r--sysdeps/powerpc/powerpc64/stpcpy.S99
1 files changed, 2 insertions, 97 deletions
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S
index c0b39729e2..09aa3be6b5 100644
--- a/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -16,103 +16,8 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-
-/* See strlen.s for comments on how the end-of-string testing works.  */
-
-/* char * [r3] stpcpy (char *dest [r3], const char *src [r4])  */
-
-EALIGN (__stpcpy, 4, 0)
-	CALL_MCOUNT 2
-
-#define rTMP	r0
-#define rRTN	r3
-#define rDEST	r3		/* pointer to previous word in dest */
-#define rSRC	r4		/* pointer to previous word in src */
-#define rWORD	r6		/* current word from src */
-#define rFEFE	r7		/* 0xfefefeff */
-#define r7F7F	r8		/* 0x7f7f7f7f */
-#define rNEG	r9		/* ~(word in src | 0x7f7f7f7f) */
-#define rALT	r10		/* alternate word from src */
-
-	or	rTMP, rSRC, rDEST
-	clrldi.	rTMP, rTMP, 62
-	addi	rDEST, rDEST, -4
-	bne	L(unaligned)
-
-	lis	rFEFE, -0x101
-	lis	r7F7F, 0x7f7f
-	lwz	rWORD, 0(rSRC)
-	addi	rFEFE, rFEFE, -0x101
-	addi	r7F7F, r7F7F, 0x7f7f
-	b	L(g2)
-
-L(g0):	lwzu	rALT, 4(rSRC)
-	stwu	rWORD, 4(rDEST)
-	add	rTMP, rFEFE, rALT
-	nor	rNEG, r7F7F, rALT
-	and.	rTMP, rTMP, rNEG
-	bne-	L(g1)
-	lwzu	rWORD, 4(rSRC)
-	stwu	rALT, 4(rDEST)
-L(g2):	add	rTMP, rFEFE, rWORD
-	nor	rNEG, r7F7F, rWORD
-	and.	rTMP, rTMP, rNEG
-	beq+	L(g0)
-
-	mr	rALT, rWORD
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):
-#ifdef __LITTLE_ENDIAN__
-	rlwinm.	rTMP, rALT, 0, 24, 31
-	stbu	rALT, 4(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	rlwinm	rTMP, rALT, 8, 24, 31
-	stbu	rTMP, 1(rDEST)
-	blr
-#else
-	rlwinm.	rTMP, rALT, 8, 24, 31
-	stbu	rTMP, 4(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	stbu	rALT, 1(rDEST)
-	blr
-#endif
-
-/* Oh well.  In this case, we just do a byte-by-byte copy.  */
-	.align 4
-	nop
-L(unaligned):
-	lbz	rWORD, 0(rSRC)
-	addi	rDEST, rDEST, 3
-	cmpwi	rWORD, 0
-	beq-	L(u2)
-
-L(u0):	lbzu	rALT, 1(rSRC)
-	stbu	rWORD, 1(rDEST)
-	cmpwi	rALT, 0
-	beq-	L(u1)
-	nop		/* Let 601 load start of loop.  */
-	lbzu	rWORD, 1(rSRC)
-	stbu	rALT, 1(rDEST)
-	cmpwi	rWORD, 0
-	bne+	L(u0)
-L(u2):	stbu	rWORD, 1(rDEST)
-	blr
-L(u1):	stbu	rALT, 1(rDEST)
-	blr
-END (__stpcpy)
+#define USE_AS_STPCPY
+#include <sysdeps/powerpc/powerpc64/strcpy.S>
 
 weak_alias (__stpcpy, stpcpy)
 libc_hidden_def (__stpcpy)