about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/power7
diff options
context:
space:
mode:
authorAlan Modra <amodra@gmail.com>2013-08-17 18:46:05 +0930
committerAlan Modra <amodra@gmail.com>2013-10-04 10:40:22 +0930
commit664318c3eb07032e2bfcf47cb2aa3c89280c19e7 (patch)
tree338e8a4e5b1215319560caa795ce5830f2f46685 /sysdeps/powerpc/powerpc64/power7
parent43b84013714c46e6dcae4a5564c5527777ad5e08 (diff)
downloadglibc-664318c3eb07032e2bfcf47cb2aa3c89280c19e7.tar.gz
glibc-664318c3eb07032e2bfcf47cb2aa3c89280c19e7.tar.xz
glibc-664318c3eb07032e2bfcf47cb2aa3c89280c19e7.zip
PowerPC LE strchr
http://sourceware.org/ml/libc-alpha/2013-08/msg00101.html

Adds little-endian support to optimised strchr assembly.  I've also
tweaked the big-endian code a little.  In power7/strchr.S there's a
check in the tail of the function that we didn't match 0 before
finding a c match, done by comparing leading zero counts.  It's just
as valid, and quicker, to compare the raw output from cmpb.

Another little tweak is to use rldimi/insrdi in place of rlwimi for
the power7 strchr functions.  Since rlwimi is cracked, it is a few
cycles slower.  rldimi can be used on the 32-bit power7 functions
too.

	* sysdeps/powerpc/powerpc64/power7/strchr.S (strchr): Add little-endian
	support.  Correct typos, formatting.  Optimize tail.  Use insrdi
	rather than rlwimi.
	* sysdeps/powerpc/powerpc32/power7/strchr.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/strchrnul.S (__strchrnul): Add
	little-endian support.  Correct typos.
	* sysdeps/powerpc/powerpc32/power7/strchrnul.S: Likewise.  Use insrdi
	rather than rlwimi.
	* sysdeps/powerpc/powerpc64/strchr.S (rTMP4, rTMP5): Define.  Use
	in loop and entry code to keep "and." results.
	(strchr): Add little-endian support.  Comment.  Move cntlzd
	earlier in tail.
	* sysdeps/powerpc/powerpc32/strchr.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7')
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strchr.S43
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strchrnul.S19
2 files changed, 50 insertions, 12 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/strchr.S b/sysdeps/powerpc/powerpc64/power7/strchr.S
index 3ffe7a1887..4679a158f1 100644
--- a/sysdeps/powerpc/powerpc64/power7/strchr.S
+++ b/sysdeps/powerpc/powerpc64/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
 	beq	cr7,L(null_match)
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi  r4,r4,32,0
 
 	/* Now r4 has a doubleword of c bytes and r0 has
@@ -47,11 +47,17 @@ ENTRY (strchr)
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+#else
 	sld	r10,r10,r6
 	sld	r11,r11,r6
 	srd	r10,r10,r6
 	srd	r11,r11,r6
+#endif
 	or	r5,r10,r11    /* OR the results to speed things up.  */
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -108,15 +114,24 @@ L(loop):
 	mr	r11,r7
 	addi	r8,r8,8
 
-	/* r5 has the output of the cmpb instruction, that is, it contains
+	/* r10/r11 have the output of the cmpb instructions, that is,
 	   0xff in the same position as the c/null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done):
-	cntlzd	r4,r10	      /* Count leading zeroes before c matches.  */
-	cntlzd	r0,r11	      /* Count leading zeroes before null matches.  */
-	cmpld	cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+	addi    r3,r10,-1
+	andc    r3,r3,r10
+	popcntd	r0,r3
+	addi    r4,r11,-1
+	andc    r4,r4,r11
+	cmpld	cr7,r3,r4
 	bgt	cr7,L(no_match)
-	srdi	r0,r4,3	      /* Convert leading zeroes to bytes.  */
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before c matches.  */
+	cmpld	cr7,r11,r10
+	bgt	cr7,L(no_match)
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching c byte
 				 or null in case c was not found.  */
 	blr
@@ -135,9 +150,13 @@ L(null_match):
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
 	sld	r5,r5,r6
 	srd	r5,r5,r6
+#endif
 	cmpdi	cr7,r5,0      /* If r10 == 0, no c or null bytes
 				 have been found.  */
 	bne	cr7,L(done_null)
@@ -192,7 +211,13 @@ L(loop_null):
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching null byte.  */
 	blr
diff --git a/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/sysdeps/powerpc/powerpc64/power7/strchrnul.S
index 9dbc51b0d1..df457525e2 100644
--- a/sysdeps/powerpc/powerpc64/power7/strchrnul.S
+++ b/sysdeps/powerpc/powerpc64/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi	r4,r4,32,0
 
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
@@ -44,10 +44,17 @@ ENTRY (__strchrnul)
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and to bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r9,r9,r6
+	sld	r10,r10,r6
+	sld	r9,r9,r6
+#else
 	sld	r10,r10,r6
 	sld	r9,r9,r6
 	srd	r10,r10,r6
 	srd	r9,r9,r6
+#endif
 	or	r5,r9,r10     /* OR the results to speed things up.  */
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -97,7 +104,7 @@ L(loop):
 	bne	cr6,L(done)
 
 	/* The c/null byte must be in the second doubleword.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
+	   address again and move the result of cmpb to r5 so we can calculate
 	   the pointer.  */
 	mr	r5,r10
 	addi	r8,r8,8
@@ -106,7 +113,13 @@ L(loop):
 	   0xff in the same position as the c/null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of matching c/null byte.  */
 	blr