diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/memcpy.S | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/memcpy.S | 22 |
3 files changed, 10 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog index 1c1d24b76f..0fb34048c3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2014-07-07 Adhemerval Zanella <azanella@linux.vnet.ibm.com> + * sysdeps/powerpc/powerpc64/power7/memcpy.S: Align VSX copies to 16B + to avoid alignment traps in non-cacheable memory. + * sysdeps/powerpc/powerpc32/power7/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/multiarch/Makefile: Add memmove multiarch objects. * sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c: New diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S index 52c2a6bcf4..e540fead87 100644 --- a/sysdeps/powerpc/powerpc32/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S @@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0) ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move code. */ - andi. 11,3,7 /* Check alignment of DST. */ - clrlwi 10,4,29 /* Check alignment of SRC. */ + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 10,4,28 /* Check alignment of SRC. */ cmplw cr6,10,11 /* SRC and DST alignments match? */ mr 12,4 mr 31,5 diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S index bbfd381b1b..58d9b1276d 100644 --- a/sysdeps/powerpc/powerpc64/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0) ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move code. */ -#ifdef __LITTLE_ENDIAN__ -/* In little-endian mode, power7 takes an alignment trap on any lxvd2x - or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy - loop is only used for quadword aligned copies. */ +/* Align copies using VSX instructions to quadword. It is to avoid alignment + traps when memcpy is used on non-cacheable memory (for instance, memory + mapped I/O). */ andi. 10,3,15 clrldi 11,4,60 -#else - andi. 10,3,7 /* Check alignment of DST. */ - clrldi 11,4,61 /* Check alignment of SRC. */ -#endif cmpld cr6,10,11 /* SRC and DST alignments match? */ mr dst,3 @@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0) beq L(aligned_copy) mtocrf 0x01,0 -#ifdef __LITTLE_ENDIAN__ clrldi 0,0,60 -#else - clrldi 0,0,61 -#endif -/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ +/* Get the DST and SRC aligned to 16 bytes. */ 1: bf 31,2f lbz 6,0(src) @@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0) stw 6,0(dst) addi dst,dst,4 8: -#ifdef __LITTLE_ENDIAN__ bf 28,16f ld 6,0(src) addi src,src,8 std 6,0(dst) addi dst,dst,8 16: -#endif subf cnt,0,cnt /* Main aligned copy loop. Copies 128 bytes at a time. */ @@ -298,9 +287,6 @@ L(copy_LE_8): .align 4 L(copy_GE_32_unaligned): clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */ -#ifndef __LITTLE_ENDIAN__ - andi. 10,3,15 /* Check alignment of DST (against quadwords). */ -#endif srdi 9,cnt,4 /* Number of full quadwords remaining. */ beq L(copy_GE_32_unaligned_cont) |