about summary refs log tree commit diff
path: root/sysdeps/aarch64/multiarch/memmove_falkor.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/aarch64/multiarch/memmove_falkor.S')
-rw-r--r--sysdeps/aarch64/multiarch/memmove_falkor.S46
1 files changed, 28 insertions, 18 deletions
diff --git a/sysdeps/aarch64/multiarch/memmove_falkor.S b/sysdeps/aarch64/multiarch/memmove_falkor.S
index 3375adf2de..c0d9560301 100644
--- a/sysdeps/aarch64/multiarch/memmove_falkor.S
+++ b/sysdeps/aarch64/multiarch/memmove_falkor.S
@@ -150,7 +150,6 @@ L(copy96):
 
 	.p2align 4
 L(copy_long):
-	sub	count, count, 64 + 16	/* Test and readjust count.  */
 	mov	B_l, Q_l
 	mov	B_h, Q_h
 	ldp	A_l, A_h, [src]
@@ -161,6 +160,8 @@ L(copy_long):
 	ldp	Q_l, Q_h, [src, 16]!
 	stp	A_l, A_h, [dstin]
 	ldp	A_l, A_h, [src, 16]!
+	subs	count, count, 32 + 64 + 16	/* Test and readjust count.  */
+	b.ls	L(last64)
 
 L(loop64):
 	subs	count, count, 32
@@ -170,18 +171,22 @@ L(loop64):
 	ldp	A_l, A_h, [src, 16]!
 	b.hi	L(loop64)
 
-	/* Write the last full set of 32 bytes.  The remainder is at most 32
-	   bytes, so it is safe to always copy 32 bytes from the end even if
-	   there is just 1 byte left.  */
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes and at least 33 bytes, so it is safe to always copy 64 bytes
+	   from the end.  */
 L(last64):
-	ldp	C_l, C_h, [srcend, -32]
+	ldp	C_l, C_h, [srcend, -64]
 	stp	Q_l, Q_h, [dst, 16]
-	ldp	Q_l, Q_h, [srcend, -16]
-	stp	A_l, A_h, [dst, 32]
-	stp	C_l, C_h, [dstend, -32]
-	stp	Q_l, Q_h, [dstend, -16]
 	mov	Q_l, B_l
 	mov	Q_h, B_h
+	ldp	B_l, B_h, [srcend, -48]
+	stp	A_l, A_h, [dst, 32]
+	ldp	A_l, A_h, [srcend, -32]
+	ldp	D_l, D_h, [srcend, -16]
+	stp	C_l, C_h, [dstend, -64]
+	stp	B_l, B_h, [dstend, -48]
+	stp	A_l, A_h, [dstend, -32]
+	stp	D_l, D_h, [dstend, -16]
 	ret
 
 	.p2align 4
@@ -204,7 +209,8 @@ L(move_long):
 	sub	count, count, tmp1
 	ldp	A_l, A_h, [srcend, -16]!
 	sub	dstend, dstend, tmp1
-	sub	count, count, 64
+	subs	count, count, 32 + 64
+	b.ls	2f
 
 1:
 	subs	count, count, 32
@@ -214,18 +220,22 @@ L(move_long):
 	ldp	A_l, A_h, [srcend, -16]!
 	b.hi	1b
 
-	/* Write the last full set of 32 bytes.  The remainder is at most 32
-	   bytes, so it is safe to always copy 32 bytes from the start even if
-	   there is just 1 byte left.  */
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes and at least 33 bytes, so it is safe to always copy 64 bytes
+	   from the start.  */
 2:
-	ldp	C_l, C_h, [src, 16]
+	ldp	C_l, C_h, [src, 48]
 	stp	Q_l, Q_h, [dstend, -16]
-	ldp	Q_l, Q_h, [src]
-	stp	A_l, A_h, [dstend, -32]
-	stp	C_l, C_h, [dstin, 16]
-	stp	Q_l, Q_h, [dstin]
 	mov	Q_l, B_l
 	mov	Q_h, B_h
+	ldp	B_l, B_h, [src, 32]
+	stp	A_l, A_h, [dstend, -32]
+	ldp	A_l, A_h, [src, 16]
+	ldp	D_l, D_h, [src]
+	stp	C_l, C_h, [dstin, 48]
+	stp	B_l, B_h, [dstin, 32]
+	stp	A_l, A_h, [dstin, 16]
+	stp	D_l, D_h, [dstin]
 3:	ret
 
 END (__memmove_falkor)