diff options
author | Alan Modra <amodra@gmail.com> | 2013-08-17 18:47:22 +0930 |
---|---|---|
committer | Alan Modra <amodra@gmail.com> | 2013-10-04 10:41:24 +0930 |
commit | 759cfef3ac4c07dba1ece0bbc1207e099348816d (patch) | |
tree | a0e8cadce4426afb90d39b330dd50688b8975484 /sysdeps/powerpc/powerpc32/power4/memcpy.S | |
parent | fe6e95d7171eba5f3e07848f081676fae4e86322 (diff) | |
download | glibc-759cfef3ac4c07dba1ece0bbc1207e099348816d.tar.gz glibc-759cfef3ac4c07dba1ece0bbc1207e099348816d.tar.xz glibc-759cfef3ac4c07dba1ece0bbc1207e099348816d.zip |
PowerPC LE memcpy
http://sourceware.org/ml/libc-alpha/2013-08/msg00103.html LIttle-endian support for memcpy. I spent some time cleaning up the 64-bit power7 memcpy, in order to avoid the extra alignment traps power7 takes for little-endian. It probably would have been better to copy the linux kernel version of memcpy. * sysdeps/powerpc/powerpc32/power4/memcpy.S: Add little endian support. * sysdeps/powerpc/powerpc32/power6/memcpy.S: Likewise. * sysdeps/powerpc/powerpc32/power7/memcpy.S: Likewise. * sysdeps/powerpc/powerpc32/power7/mempcpy.S: Likewise. * sysdeps/powerpc/powerpc64/memcpy.S: Likewise. * sysdeps/powerpc/powerpc64/power4/memcpy.S: Likewise. * sysdeps/powerpc/powerpc64/power6/memcpy.S: Likewise. * sysdeps/powerpc/powerpc64/power7/memcpy.S: Likewise. * sysdeps/powerpc/powerpc64/power7/mempcpy.S: Likewise. Make better use of regs. Use power7 mtocrf. Tidy function tails.
Diffstat (limited to 'sysdeps/powerpc/powerpc32/power4/memcpy.S')
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/memcpy.S | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc32/power4/memcpy.S b/sysdeps/powerpc/powerpc32/power4/memcpy.S index d9146631e3..338d3cce30 100644 --- a/sysdeps/powerpc/powerpc32/power4/memcpy.S +++ b/sysdeps/powerpc/powerpc32/power4/memcpy.S @@ -203,15 +203,28 @@ EALIGN (memcpy, 5, 0) blt cr6,5f srwi 7,6,16 bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else sth 6,0(3) +#endif b 7f .align 4 3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else stb 7,0(3) sth 6,1(3) +#endif b 7f .align 4 5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif stb 6,0(3) 7: cmplwi cr1,10,16 @@ -339,13 +352,23 @@ EALIGN (memcpy, 5, 0) bf 30,1f /* there are at least two words to copy, so copy them */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else slw 0,6,10 /* shift 1st src word to left align it in R0 */ srw 8,7,9 /* shift 2nd src word to right align it in R8 */ +#endif or 0,0,8 /* or them to get word to store */ lwz 6,8(5) /* load the 3rd src word */ stw 0,0(4) /* store the 1st dst word */ +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else slw 0,7,10 /* now left align 2nd src word into R0 */ srw 8,6,9 /* shift 3rd src word to right align it in R8 */ +#endif or 0,0,8 /* or them to get word to store */ lwz 7,12(5) stw 0,4(4) /* store the 2nd dst word */ @@ -353,8 +376,13 @@ EALIGN (memcpy, 5, 0) addi 5,5,16 bf 31,4f /* there is a third word to copy, so copy it */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else slw 0,6,10 /* shift 3rd src word to left align it in R0 */ srw 8,7,9 /* shift 4th src word to right align it in R8 */ +#endif or 0,0,8 /* or them to get word to store */ stw 0,0(4) /* store 3rd dst word */ mr 6,7 @@ -364,8 +392,13 @@ EALIGN (memcpy, 5, 0) b 4f .align 4 1: +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else slw 0,6,10 /* shift 1st src word to left align it in R0 */ srw 8,7,9 /* shift 2nd src word to right align it in R8 */ +#endif addi 5,5,8 or 0,0,8 /* or them to get word to store */ bf 31,4f @@ -378,23 +411,43 @@ EALIGN (memcpy, 5, 0) .align 4 4: /* copy 16 bytes at a time */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else slw 0,6,10 srw 8,7,9 +#endif or 0,0,8 lwz 6,0(5) stw 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else slw 0,7,10 srw 8,6,9 +#endif or 0,0,8 lwz 7,4(5) stw 0,4(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else slw 0,6,10 srw 8,7,9 +#endif or 0,0,8 lwz 6,8(5) stw 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else slw 0,7,10 srw 8,6,9 +#endif or 0,0,8 lwz 7,12(5) stw 0,12(4) @@ -403,8 +456,13 @@ EALIGN (memcpy, 5, 0) bdnz+ 4b 8: /* calculate and store the final word */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else slw 0,6,10 srw 8,7,9 +#endif or 0,0,8 stw 0,0(4) 3: |