about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/power7/memset.S
diff options
context:
space:
mode:
authorAlan Modra <amodra@gmail.com>2013-08-17 18:47:59 +0930
committerAlan Modra <amodra@gmail.com>2013-10-04 10:41:35 +0930
commit3be87c77d24c4456ccca4034363b6d1814cd0c84 (patch)
treec81589a21d74b94d5ae20235bc6d81715eca858c /sysdeps/powerpc/powerpc64/power7/memset.S
parent759cfef3ac4c07dba1ece0bbc1207e099348816d (diff)
downloadglibc-3be87c77d24c4456ccca4034363b6d1814cd0c84.tar.gz
glibc-3be87c77d24c4456ccca4034363b6d1814cd0c84.tar.xz
glibc-3be87c77d24c4456ccca4034363b6d1814cd0c84.zip
PowerPC LE memset
http://sourceware.org/ml/libc-alpha/2013-08/msg00104.html

One of the things I noticed when looking at power7 timing is that rlwimi
is cracked and the two resulting insns have a register dependency.
That makes it a little slower than the equivalent rldimi.

	* sysdeps/powerpc/powerpc64/memset.S: Replace rlwimi with
        insrdi.  Formatting.
	* sysdeps/powerpc/powerpc64/power4/memset.S: Likewise.
	* sysdeps/powerpc/powerpc64/power6/memset.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/memset.S: Likewise.
	* sysdeps/powerpc/powerpc32/power4/memset.S: Likewise.
	* sysdeps/powerpc/powerpc32/power6/memset.S: Likewise.
	* sysdeps/powerpc/powerpc32/power7/memset.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7/memset.S')
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memset.S6
1 files changed, 3 insertions, 3 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/memset.S b/sysdeps/powerpc/powerpc64/power7/memset.S
index b24cfa163a..8b081e87cb 100644
--- a/sysdeps/powerpc/powerpc64/power7/memset.S
+++ b/sysdeps/powerpc/powerpc64/power7/memset.S
@@ -32,8 +32,8 @@ L(_memset):
 	mr	10,3
 
 	/* Replicate byte to word.  */
-	rlwimi	4,4,8,16,23
-	rlwimi	4,4,16,0,15
+	insrdi	4,4,8,48
+	insrdi	4,4,16,32
 	ble	cr6,L(small)	/* If length <= 8, use short copy code.  */
 
 	neg	0,3
@@ -321,7 +321,7 @@ L(medium):
 	clrldi	0,0,62
 	beq	L(medium_aligned)
 
-	/* Force 4-bytes alignment for SRC.  */
+	/* Force 4-bytes alignment for DST.  */
 	mtocrf	0x01,0
 	subf	5,0,5
 1:	/* Copy 1 byte.  */