summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/power7
diff options
context:
space:
mode:
authorAlan Modra <amodra@gmail.com>2013-08-17 18:40:11 +0930
committerAlan Modra <amodra@gmail.com>2013-10-04 10:39:32 +0930
commitdb9b4570c5dc550074140ac1d1677077fba29a26 (patch)
treec6469b8ce6b7ec28cc9f7c27484e67d351ce3349 /sysdeps/powerpc/powerpc64/power7
parentf7c399cff5bd04ee9dc117fb6b0f39597dc047c6 (diff)
downloadglibc-db9b4570c5dc550074140ac1d1677077fba29a26.tar.gz
glibc-db9b4570c5dc550074140ac1d1677077fba29a26.tar.xz
glibc-db9b4570c5dc550074140ac1d1677077fba29a26.zip
PowerPC LE strlen
http://sourceware.org/ml/libc-alpha/2013-08/msg00097.html

This is the first of nine patches adding little-endian support to the
existing optimised string and memory functions.  I did spend some
time with a power7 simulator looking at cycle by cycle behaviour for
memchr, but most of these patches have not been run on cpu simulators
to check that we are going as fast as possible.  I'm sure PowerPC can
do better.  However, the little-endian support mostly leaves main
loops unchanged, so I'm banking on previous authors having done a
good job on big-endian..  As with most code you stare at long enough,
I found some improvements for big-endian too.

Little-endian support for strlen.  Like most of the string functions,
I leave the main word or multiple-word loops substantially unchanged,
just needing to modify the tail.

Removing the branch in the power7 functions is just a tidy.  .align
produces a branch anyway.  Modifying regs in the non-power7 functions
is to suit the new little-endian tail.

	* sysdeps/powerpc/powerpc64/power7/strlen.S (strlen): Add little-endian
	support.  Don't branch over align.
	* sysdeps/powerpc/powerpc32/power7/strlen.S: Likewise.
	* sysdeps/powerpc/powerpc64/strlen.S (strlen): Add little-endian support.
	Rearrange tmp reg use to suit.  Comment.
	* sysdeps/powerpc/powerpc32/strlen.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7')
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strlen.S17
1 files changed, 12 insertions, 5 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/strlen.S b/sysdeps/powerpc/powerpc64/power7/strlen.S
index 343216952e..807ef1082e 100644
--- a/sysdeps/powerpc/powerpc64/power7/strlen.S
+++ b/sysdeps/powerpc/powerpc64/power7/strlen.S
@@ -30,7 +30,11 @@ ENTRY (strlen)
 				 with cmpb.  */
 	li	r5,-1	      /* MASK = 0xffffffffffffffff.  */
 	ld	r12,0(r4)     /* Load doubleword from memory.  */
+#ifdef __LITTLE_ENDIAN__
+	sld	r5,r5,r6
+#else
 	srd	r5,r5,r6      /* MASK = MASK >> padding.  */
+#endif
 	orc	r9,r12,r5     /* Mask bits that are not part of the string.  */
 	cmpb	r10,r9,r0     /* Check for null bytes in DWORD1.  */
 	cmpdi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
@@ -48,9 +52,6 @@ ENTRY (strlen)
 	cmpb	r10,r12,r0
 	cmpdi	cr7,r10,0
 	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
 
 	/* Main loop to look for the end of the string.  Since it's a
 	   small loop (< 8 instructions), align it to 32-bytes.  */
@@ -87,9 +88,15 @@ L(loop):
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the length.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	r9, r10, -1   /* Form a mask from trailing zeros.  */
+	andc	r9, r9, r10
+	popcntd r0, r9	      /* Count the bits in the mask.  */
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
+#endif
 	subf	r5,r3,r4
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r5,r0      /* Compute final length.  */
 	blr
 END (strlen)