about summary refs log tree commit diff
path: root/sysdeps/aarch64/multiarch/memcpy.c
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2020-07-15 16:55:07 +0100
committerWilco Dijkstra <wdijkstr@arm.com>2020-07-15 16:55:07 +0100
commit4a733bf375238a6a595033b5785cea7f27d61307 (patch)
treee649148bc3df2bf6ce1d256b5d9c8dc6d70b4195 /sysdeps/aarch64/multiarch/memcpy.c
parent34f0d01d5e43c7dedd002ab47f6266dfb5b79c22 (diff)
downloadglibc-4a733bf375238a6a595033b5785cea7f27d61307.tar.gz
glibc-4a733bf375238a6a595033b5785cea7f27d61307.tar.xz
glibc-4a733bf375238a6a595033b5785cea7f27d61307.zip
AArch64: Add optimized Q-register memcpy
Add a new memcpy using 128-bit Q registers - this is faster on modern
cores and reduces codesize.  Similar to the generic memcpy, small cases
include copies up to 32 bytes.  64-128 byte copies are split into two
cases to improve performance of 64-96 byte copies.  Large copies align
the source rather than the destination.

bench-memcpy-random is ~9% faster than memcpy_falkor on Neoverse N1,
so make this memcpy the default on N1 (on Centriq it is 15% faster than
memcpy_falkor).

Passes GLIBC regression tests.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Diffstat (limited to 'sysdeps/aarch64/multiarch/memcpy.c')
-rw-r--r--sysdeps/aarch64/multiarch/memcpy.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 2fafefd5d2..e6f3ae1167 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -29,6 +29,7 @@
 extern __typeof (__redirect_memcpy) __libc_memcpy;
 
 extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_simd attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
@@ -36,11 +37,11 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
 libc_ifunc (__libc_memcpy,
             (IS_THUNDERX (midr)
 	     ? __memcpy_thunderx
-	     : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_ARES (midr) || IS_KUNPENG920 (midr)
+	     : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_KUNPENG920 (midr)
 		? __memcpy_falkor
 		: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
 		  ? __memcpy_thunderx2
-		  : __memcpy_generic))));
+		  : (IS_ARES (midr) ? __memcpy_simd : __memcpy_generic)))));
 
 # undef memcpy
 strong_alias (__libc_memcpy, memcpy);