From 3be87c77d24c4456ccca4034363b6d1814cd0c84 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Sat, 17 Aug 2013 18:47:59 +0930 Subject: PowerPC LE memset http://sourceware.org/ml/libc-alpha/2013-08/msg00104.html One of the things I noticed when looking at power7 timing is that rlwimi is cracked and the two resulting insns have a register dependency. That makes it a little slower than the equivalent rldimi. * sysdeps/powerpc/powerpc64/memset.S: Replace rlwimi with insrdi. Formatting. * sysdeps/powerpc/powerpc64/power4/memset.S: Likewise. * sysdeps/powerpc/powerpc64/power6/memset.S: Likewise. * sysdeps/powerpc/powerpc64/power7/memset.S: Likewise. * sysdeps/powerpc/powerpc32/power4/memset.S: Likewise. * sysdeps/powerpc/powerpc32/power6/memset.S: Likewise. * sysdeps/powerpc/powerpc32/power7/memset.S: Likewise. --- sysdeps/powerpc/powerpc64/power7/memset.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'sysdeps/powerpc/powerpc64/power7') diff --git a/sysdeps/powerpc/powerpc64/power7/memset.S b/sysdeps/powerpc/powerpc64/power7/memset.S index b24cfa163a..8b081e87cb 100644 --- a/sysdeps/powerpc/powerpc64/power7/memset.S +++ b/sysdeps/powerpc/powerpc64/power7/memset.S @@ -32,8 +32,8 @@ L(_memset): mr 10,3 /* Replicate byte to word. */ - rlwimi 4,4,8,16,23 - rlwimi 4,4,16,0,15 + insrdi 4,4,8,48 + insrdi 4,4,16,32 ble cr6,L(small) /* If length <= 8, use short copy code. */ neg 0,3 @@ -321,7 +321,7 @@ L(medium): clrldi 0,0,62 beq L(medium_aligned) - /* Force 4-bytes alignment for SRC. */ + /* Force 4-bytes alignment for DST. */ mtocrf 0x01,0 subf 5,0,5 1: /* Copy 1 byte. */ -- cgit 1.4.1