diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2020-07-15 16:55:07 +0100 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2020-10-14 17:29:12 +0100 |
commit | 4bd28df0b0598c380f4ae63b96eaadc782c9d709 (patch) | |
tree | fc4eb7c0cee22e40238823c890623eaa8b6f9423 /sysdeps/aarch64/multiarch/memcpy.c | |
parent | 118fbee7a0dfac0d311b8a7a8f8bd8d1fb6e205b (diff) | |
download | glibc-4bd28df0b0598c380f4ae63b96eaadc782c9d709.tar.gz glibc-4bd28df0b0598c380f4ae63b96eaadc782c9d709.tar.xz glibc-4bd28df0b0598c380f4ae63b96eaadc782c9d709.zip |
AArch64: Add optimized Q-register memcpy
Add a new memcpy using 128-bit Q registers - this is faster on modern cores and reduces codesize. Similar to the generic memcpy, small cases include copies up to 32 bytes. 64-128 byte copies are split into two cases to improve performance of 64-96 byte copies. Large copies align the source rather than the destination. bench-memcpy-random is ~9% faster than memcpy_falkor on Neoverse N1, so make this memcpy the default on N1 (on Centriq it is 15% faster than memcpy_falkor). Passes GLIBC regression tests. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com> (cherry picked from commit 4a733bf375238a6a595033b5785cea7f27d61307)
Diffstat (limited to 'sysdeps/aarch64/multiarch/memcpy.c')
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy.c | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index 8f5d4e7df5..a6774f0d17 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -29,6 +29,7 @@ extern __typeof (__redirect_memcpy) __libc_memcpy; extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_simd attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; @@ -36,11 +37,11 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; libc_ifunc (__libc_memcpy, (IS_THUNDERX (midr) ? __memcpy_thunderx - : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_ARES (midr) + : (IS_FALKOR (midr) || IS_PHECDA (midr) ? __memcpy_falkor : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) ? __memcpy_thunderx2 - : __memcpy_generic)))); + : (IS_ARES (midr) ? __memcpy_simd : __memcpy_generic))))); # undef memcpy strong_alias (__libc_memcpy, memcpy); |