PowerPC: Align power7 memcpy using VSX to quadword

This patch changes power7 memcpy to use VSX instructions only when memory is aligned to quardword. It is to avoid unaligned kernel traps on non-cacheable memory (for instance, memory-mapped I/O).
author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2014-06-25 11:54:31 -0500
committer: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2014-08-28 09:30:09 -0400
commit: 565e3d6c8230affd7089bf5ebfcebbf72f32a27c (patch)
tree: 0dd1cded4c6fb24fa3773a9c7846e5233e58f90f
parent: 6fae3527af330c32399e3a4cdfac3958fc440eb8 (diff)
download: glibc-565e3d6c8230affd7089bf5ebfcebbf72f32a27c.tar.gz
glibc-565e3d6c8230affd7089bf5ebfcebbf72f32a27c.tar.xz
glibc-565e3d6c8230affd7089bf5ebfcebbf72f32a27c.zip
3 files changed, 10 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog
index c3ee6357ad..3c12f8deab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2014-07-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
 
+	* sysdeps/powerpc/powerpc64/power7/memcpy.S: Align VSX copies to 16B
+	to avoid alignment traps in non-cacheable memory.
+	* sysdeps/powerpc/powerpc32/power7/memcpy.S: Likewise.
+
 	* sysdeps/powerpc/powerpc32/power4/multiarch/Makefile: Add memmove
 	multiarch objects.
 	* sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c: New
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
index 52c2a6bcf4..e540fead87 100644
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
 				    code.  */
 
-	andi.   11,3,7	      /* Check alignment of DST.  */
-	clrlwi  10,4,29	      /* Check alignment of SRC.  */
+	andi.   11,3,15	      /* Check alignment of DST.  */
+	clrlwi  10,4,28	      /* Check alignment of SRC.  */
 	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
 	mr	12,4
 	mr	31,5
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
index bbfd381b1b..58d9b1276d 100644
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
 				    code.  */
 
-#ifdef __LITTLE_ENDIAN__
-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
-   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
-   loop is only used for quadword aligned copies.  */
+/* Align copies using VSX instructions to quadword. It is to avoid alignment
+   traps when memcpy is used on non-cacheable memory (for instance, memory
+   mapped I/O).  */
 	andi.	10,3,15
 	clrldi	11,4,60
-#else
-	andi.	10,3,7		/* Check alignment of DST.  */
-	clrldi	11,4,61		/* Check alignment of SRC.  */
-#endif
 	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
 
 	mr	dst,3
@@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0)
 	beq	L(aligned_copy)
 
 	mtocrf	0x01,0
-#ifdef __LITTLE_ENDIAN__
 	clrldi	0,0,60
-#else
-	clrldi	0,0,61
-#endif
 
-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
+/* Get the DST and SRC aligned to 16 bytes.  */
 1:
 	bf	31,2f
 	lbz	6,0(src)
@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
 	stw	6,0(dst)
 	addi	dst,dst,4
 8:
-#ifdef __LITTLE_ENDIAN__
 	bf	28,16f
 	ld	6,0(src)
 	addi	src,src,8
 	std	6,0(dst)
 	addi	dst,dst,8
 16:
-#endif
 	subf	cnt,0,cnt
 
 /* Main aligned copy loop. Copies 128 bytes at a time. */
@@ -298,9 +287,6 @@ L(copy_LE_8):
 	.align	4
 L(copy_GE_32_unaligned):
 	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
-#ifndef __LITTLE_ENDIAN__
-	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
-#endif
 	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
 
 	beq	L(copy_GE_32_unaligned_cont)
author	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2014-06-25 11:54:31 -0500
committer	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2014-08-28 09:30:09 -0400
commit	565e3d6c8230affd7089bf5ebfcebbf72f32a27c (patch)
tree	0dd1cded4c6fb24fa3773a9c7846e5233e58f90f
parent	6fae3527af330c32399e3a4cdfac3958fc440eb8 (diff)
download	glibc-565e3d6c8230affd7089bf5ebfcebbf72f32a27c.tar.gz glibc-565e3d6c8230affd7089bf5ebfcebbf72f32a27c.tar.xz glibc-565e3d6c8230affd7089bf5ebfcebbf72f32a27c.zip