diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2021-08-10 13:44:27 +0100 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2021-08-10 13:44:27 +0100 |
commit | e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0 (patch) | |
tree | c67d8ca88def27c97b986bbfbc1368227015057c | |
parent | 186092c6ba8825598ffdbf15dbf0823c771f560d (diff) | |
download | glibc-e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0.tar.gz glibc-e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0.tar.xz glibc-e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0.zip |
[4/5] AArch64: Improve A64FX memset by removing unroll32
Remove unroll32 code since it doesn't improve performance. Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com>
-rw-r--r-- | sysdeps/aarch64/multiarch/memset_a64fx.S | 18 |
1 files changed, 1 insertions, 17 deletions
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index 337c86be6f..ef0315658a 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -102,22 +102,6 @@ L(vl_agnostic): // VL Agnostic ccmp vector_length, tmp1, 0, cs b.eq L(L1_prefetch) -L(unroll32): - lsl tmp1, vector_length, 3 // vector_length * 8 - lsl tmp2, vector_length, 5 // vector_length * 32 - .p2align 3 -1: cmp rest, tmp2 - b.cc L(unroll8) - st1b_unroll - add dst, dst, tmp1 - st1b_unroll - add dst, dst, tmp1 - st1b_unroll - add dst, dst, tmp1 - st1b_unroll - add dst, dst, tmp1 - sub rest, rest, tmp2 - b 1b L(unroll8): lsl tmp1, vector_length, 3 @@ -155,7 +139,7 @@ L(L1_prefetch): // if rest >= L1_SIZE sub rest, rest, CACHE_LINE_SIZE * 2 cmp rest, L1_SIZE b.ge 1b - cbnz rest, L(unroll32) + cbnz rest, L(unroll8) ret // count >= L2_SIZE |