about summary refs log tree commit diff
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2020-08-28 17:51:40 +0100
committerWilco Dijkstra <wdijkstr@arm.com>2020-10-14 17:29:12 +0100
commitb915da29dab5d8c6b9cdb1ee6fdc1e0ec6ef39e1 (patch)
tree5f060261fc04bcfe189333dfbc315c78285d005e
parent4bd28df0b0598c380f4ae63b96eaadc782c9d709 (diff)
downloadglibc-b915da29dab5d8c6b9cdb1ee6fdc1e0ec6ef39e1.tar.gz
glibc-b915da29dab5d8c6b9cdb1ee6fdc1e0ec6ef39e1.tar.xz
glibc-b915da29dab5d8c6b9cdb1ee6fdc1e0ec6ef39e1.zip
AArch64: Improve backwards memmove performance
On some microarchitectures performance of the backwards memmove improves if
the stores use STR with decreasing addresses.  So change the memmove loop
in memcpy_advsimd.S to use 2x STR rather than STP.

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit bd394d131c10c9ec22c6424197b79410042eed99)
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_advsimd.S7
1 files changed, 4 insertions, 3 deletions
diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
index d4ba747777..48bb6d7ca4 100644
--- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S
+++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
@@ -223,12 +223,13 @@ L(copy_long_backwards):
 	b.ls	L(copy64_from_start)
 
 L(loop64_backwards):
-	stp	A_q, B_q, [dstend, -32]
+	str	B_q, [dstend, -16]
+	str	A_q, [dstend, -32]
 	ldp	A_q, B_q, [srcend, -96]
-	stp	C_q, D_q, [dstend, -64]
+	str	D_q, [dstend, -48]
+	str	C_q, [dstend, -64]!
 	ldp	C_q, D_q, [srcend, -128]
 	sub	srcend, srcend, 64
-	sub	dstend, dstend, 64
 	subs	count, count, 64
 	b.hi	L(loop64_backwards)