diff options
author | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2023-10-26 17:07:21 +0100 |
---|---|---|
committer | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2024-04-08 16:51:34 +0100 |
commit | 1da017615277676054c47d99b030230e295d8583 (patch) | |
tree | 6edaf3532a3e72e67b1d6434cc652ff702e6049d /sysdeps/aarch64/memset.S | |
parent | f9ae26cbbec008eb572a2af8a743ea7d3945e78e (diff) | |
download | glibc-1da017615277676054c47d99b030230e295d8583.tar.gz glibc-1da017615277676054c47d99b030230e295d8583.tar.xz glibc-1da017615277676054c47d99b030230e295d8583.zip |
AArch64: Add memset_zva64
Add a specialized memset for the common ZVA size of 64 to avoid the overhead of reading the ZVA size. Since the code is identical to __memset_falkor, remove the latter. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> (cherry picked from commit 3d7090f14b13312320e425b27dcf0fe72de026fd)
Diffstat (limited to 'sysdeps/aarch64/memset.S')
-rw-r--r-- | sysdeps/aarch64/memset.S | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 0219f6cd6f..b76d1c3e5e 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -101,19 +101,19 @@ L(tail64): ret L(try_zva): -#ifdef ZVA_MACRO - zva_macro -#else +#ifndef ZVA64_ONLY .p2align 3 mrs tmp1, dczid_el0 tbnz tmp1w, 4, L(no_zva) and tmp1w, tmp1w, 15 cmp tmp1w, 4 /* ZVA size is 64 bytes. */ b.ne L(zva_128) - + nop +#endif /* Write the first and last 64 byte aligned block using stp rather than using DC ZVA. This is faster on some cores. */ + .p2align 4 L(zva_64): str q0, [dst, 16] stp q0, q0, [dst, 32] @@ -123,7 +123,6 @@ L(zva_64): sub count, dstend, dst /* Count is now 128 too large. */ sub count, count, 128+64+64 /* Adjust count and bias for loop. */ add dst, dst, 128 - nop 1: dc zva, dst add dst, dst, 64 subs count, count, 64 @@ -134,6 +133,7 @@ L(zva_64): stp q0, q0, [dstend, -32] ret +#ifndef ZVA64_ONLY .p2align 3 L(zva_128): cmp tmp1w, 5 /* ZVA size is 128 bytes. */ |