diff options
author | dengjianbo <dengjianbo@loongson.cn> | 2023-08-28 10:08:35 +0800 |
---|---|---|
committer | caiyinyu <caiyinyu@loongson.cn> | 2023-08-29 10:35:38 +0800 |
commit | f8664fe2155eb5ddc22272bac72ab26368735718 (patch) | |
tree | aac9760625ff65d3f84561ff4ac30f63ae3e6f1b /sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S | |
parent | 3efa26749e4d28768558330353dc15c6f325ed4e (diff) | |
download | glibc-f8664fe2155eb5ddc22272bac72ab26368735718.tar.gz glibc-f8664fe2155eb5ddc22272bac72ab26368735718.tar.xz glibc-f8664fe2155eb5ddc22272bac72ab26368735718.zip |
LoongArch: Add ifunc support for rawmemchr{aligned, lsx, lasx}
According to glibc rawmemchr microbenchmark, A few cases tested with char '\0' experience performance degradation due to the lasx and lsx versions don't handle the '\0' separately. Overall, rawmemchr-lasx implementation could reduce the runtime about 40%-80%, rawmemchr-lsx implementation could reduce the runtime about 40%-66%, rawmemchr-aligned implementation could reduce the runtime about 20%-40%.
Diffstat (limited to 'sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S')
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S new file mode 100644 index 0000000000..9c7155ae82 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S @@ -0,0 +1,124 @@ +/* Optimized rawmemchr implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) +# define RAWMEMCHR_NAME __rawmemchr_aligned +#else +# define RAWMEMCHR_NAME __rawmemchr +#endif + +LEAF(RAWMEMCHR_NAME, 6) + andi t1, a0, 0x7 + bstrins.d a0, zero, 2, 0 + lu12i.w a2, 0x01010 + bstrins.d a1, a1, 15, 8 + + ld.d t0, a0, 0 + slli.d t1, t1, 3 + ori a2, a2, 0x101 + bstrins.d a1, a1, 31, 16 + + li.w t8, -1 + bstrins.d a1, a1, 63, 32 + bstrins.d a2, a2, 63, 32 + sll.d t2, t8, t1 + + sll.d t3, a1, t1 + orn t0, t0, t2 + slli.d a3, a2, 7 + beqz a1, L(find_zero) + + xor t0, t0, t3 + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + + bnez t3, L(count_pos) + addi.d a0, a0, 8 + +L(loop): + ld.d t0, a0, 0 + xor t0, t0, a1 + + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + xor t0, t0, a1 + sub.d t1, t0, a2 + + andn t2, a3, t0 + and t3, t1, t2 + beqz t3, L(loop) + addi.d a0, a0, -8 +L(count_pos): + ctz.d t0, t3 + srli.d t0, t0, 3 + add.d a0, a0, t0 + jr ra + +L(loop_7bit): + ld.d t0, a0, 0 +L(find_zero): + sub.d t1, t0, a2 + and t2, t1, a3 + bnez t2, L(more_check) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t0, a2 + and t2, t1, a3 + + beqz t2, L(loop_7bit) + addi.d a0, a0, -8 + +L(more_check): + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + addi.d a0, a0, 8 + +L(loop_8bit): + ld.d t0, a0, 0 + + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t0, a2 + + andn t2, a3, t0 + and t3, t1, t2 + beqz t3, L(loop_8bit) + + addi.d a0, a0, -8 + b L(count_pos) + +END(RAWMEMCHR_NAME) + +libc_hidden_builtin_def (__rawmemchr) |