diff options
author | dengjianbo <dengjianbo@loongson.cn> | 2023-08-28 10:08:37 +0800 |
---|---|---|
committer | caiyinyu <caiyinyu@loongson.cn> | 2023-08-29 10:35:38 +0800 |
commit | 55e84dc6edc074f0f87a28bafce3249dfed4b201 (patch) | |
tree | b0d1496e0e26400dcd06cda181f73ad89cdfbbf3 /sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S | |
parent | 60bcb9acbfcb40d1b613a13b539cb75e500b4ad6 (diff) | |
download | glibc-55e84dc6edc074f0f87a28bafce3249dfed4b201.tar.gz glibc-55e84dc6edc074f0f87a28bafce3249dfed4b201.tar.xz glibc-55e84dc6edc074f0f87a28bafce3249dfed4b201.zip |
LoongArch: Add ifunc support for memrchr{lsx, lasx}
According to glibc memrchr microbenchmark, this implementation could reduce the runtime as following: Name Percent of rutime reduced memrchr-lasx 20%-83% memrchr-lsx 20%-64%
Diffstat (limited to 'sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S')
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S new file mode 100644 index 0000000000..5f3e0d06d7 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S @@ -0,0 +1,123 @@ +/* Optimized memrchr implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) && !defined __loongarch_soft_float + +#ifndef MEMRCHR +# define MEMRCHR __memrchr_lasx +#endif + +LEAF(MEMRCHR, 6) + beqz a2, L(ret0) + addi.d a2, a2, -1 + add.d a3, a0, a2 + andi t1, a3, 0x3f + + bstrins.d a3, zero, 5, 0 + addi.d t1, t1, 1 + xvld xr0, a3, 0 + xvld xr1, a3, 32 + + sub.d t2, zero, t1 + li.d t3, -1 + xvreplgr2vr.b xr2, a1 + andi t4, a0, 0x3f + + srl.d t2, t3, t2 + xvseq.b xr0, xr0, xr2 + xvseq.b xr1, xr1, xr2 + xvmsknz.b xr0, xr0 + + + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + vilvl.h vr0, vr3, vr0 + + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + and t0, t0, t2 + + bltu a2, t1, L(end) + bnez t0, L(found) + bstrins.d a0, zero, 5, 0 +L(loop): + xvld xr0, a3, -64 + + xvld xr1, a3, -32 + addi.d a3, a3, -64 + xvseq.b xr0, xr0, xr2 + xvseq.b xr1, xr1, xr2 + + + beq a0, a3, L(out) + xvmax.bu xr3, xr0, xr1 + xvseteqz.v fcc0, xr3 + bcnez fcc0, L(loop) + + xvmsknz.b xr0, xr0 + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + + vilvl.h vr0, vr3, vr0 + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + +L(found): + addi.d a0, a3, 63 + clz.d t1, t0 + sub.d a0, a0, t1 + jr ra + + +L(out): + xvmsknz.b xr0, xr0 + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + + vilvl.h vr0, vr3, vr0 + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + +L(end): + sll.d t2, t3, t4 + and t0, t0, t2 + addi.d a0, a3, 63 + clz.d t1, t0 + + sub.d a0, a0, t1 + maskeqz a0, a0, t0 + jr ra +L(ret0): + move a0, zero + + + jr ra +END(MEMRCHR) + +libc_hidden_builtin_def (MEMRCHR) +#endif |