From 24279aecf34a830a744038cb922d94b5a2d9c1cb Mon Sep 17 00:00:00 2001 From: dengjianbo Date: Wed, 13 Sep 2023 15:35:00 +0800 Subject: LoongArch: Add ifunc support for strrchr{aligned, lsx, lasx} According to glibc strrchr microbenchmark test results, this implementation could reduce the runtime time as following: Name Percent of rutime reduced strrchr-lasx 10%-50% strrchr-lsx 0%-50% strrchr-aligned 5%-50% Generic strrchr is implemented by function strlen + memrchr, the lasx version will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx, the lsx version will compare with generic strrchr implemented by strlen-lsx + memrchr-lsx, the aligned version will compare with generic strrchr implemented by strlen-aligned + memrchr-generic. --- sysdeps/loongarch/lp64/multiarch/Makefile | 3 + sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c | 8 + sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h | 41 +++++ sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S | 170 ++++++++++++++++++++ sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S | 176 +++++++++++++++++++++ sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S | 144 +++++++++++++++++ sysdeps/loongarch/lp64/multiarch/strrchr.c | 36 +++++ 7 files changed, 578 insertions(+) create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile index 39550bea9b..fe863e1ba4 100644 --- a/sysdeps/loongarch/lp64/multiarch/Makefile +++ b/sysdeps/loongarch/lp64/multiarch/Makefile @@ -9,6 +9,9 @@ sysdep_routines += \ strchr-aligned \ strchr-lsx \ strchr-lasx \ + strrchr-aligned \ + strrchr-lsx \ + strrchr-lasx \ strchrnul-aligned \ strchrnul-lsx \ strchrnul-lasx \ diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c index 39a14f1d6e..529e236950 100644 --- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c @@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) ) + IFUNC_IMPL (i, name, strrchr, +#if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx) + IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx) +#endif + IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned) + ) + IFUNC_IMPL (i, name, memcpy, #if !defined __loongarch_soft_float IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h new file mode 100644 index 0000000000..bbb34089ef --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h @@ -0,0 +1,41 @@ +/* Common definition for strrchr ifunc selections. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#if !defined __loongarch_soft_float +extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ +#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + return OPTIMIZE (lasx); + else if (SUPPORT_LSX) + return OPTIMIZE (lsx); + else +#endif + return OPTIMIZE (aligned); +} diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S new file mode 100644 index 0000000000..a73deb7840 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S @@ -0,0 +1,170 @@ +/* Optimized strrchr implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) +# define STRRCHR __strrchr_aligned +#else +# define STRRCHR strrchr +#endif + +LEAF(STRRCHR, 6) + slli.d t0, a0, 3 + bstrins.d a0, zero, 2, 0 + lu12i.w a2, 0x01010 + ld.d t2, a0, 0 + + andi a1, a1, 0xff + ori a2, a2, 0x101 + li.d t3, -1 + bstrins.d a2, a2, 63, 32 + + sll.d t5, t3, t0 + slli.d a3, a2, 7 + orn t4, t2, t5 + mul.d a1, a1, a2 + + sub.d t0, t4, a2 + andn t1, a3, t4 + and t1, t0, t1 + beqz t1, L(find_tail) + + + ctz.d t0, t1 + orn t0, zero, t0 + xor t2, t4, a1 + srl.d t0, t3, t0 + + orn t2, t2, t0 + orn t2, t2, t5 + revb.d t2, t2 + sub.d t1, t2, a2 + + andn t0, a3, t2 + and t1, t0, t1 + ctz.d t0, t1 + srli.d t0, t0, 3 + + addi.d a0, a0, 7 + sub.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra + + +L(find_tail): + addi.d a4, a0, 8 + addi.d a0, a0, 8 +L(loop_ascii): + ld.d t2, a0, 0 + sub.d t1, t2, a2 + + and t0, t1, a3 + bnez t0, L(more_check) + ld.d t2, a0, 8 + sub.d t1, t2, a2 + + and t0, t1, a3 + addi.d a0, a0, 16 + beqz t0, L(loop_ascii) + addi.d a0, a0, -8 + +L(more_check): + andn t0, a3, t2 + and t1, t1, t0 + bnez t1, L(tail) + addi.d a0, a0, 8 + + +L(loop_nonascii): + ld.d t2, a0, 0 + sub.d t1, t2, a2 + andn t0, a3, t2 + and t1, t0, t1 + + bnez t1, L(tail) + ld.d t2, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t2, a2 + + andn t0, a3, t2 + and t1, t0, t1 + beqz t1, L(loop_nonascii) + addi.d a0, a0, -8 + +L(tail): + ctz.d t0, t1 + orn t0, zero, t0 + xor t2, t2, a1 + srl.d t0, t3, t0 + + + orn t2, t2, t0 + revb.d t2, t2 + sub.d t1, t2, a2 + andn t0, a3, t2 + + and t1, t0, t1 + bnez t1, L(count_pos) +L(find_loop): + beq a0, a4, L(find_end) + ld.d t2, a0, -8 + + addi.d a0, a0, -8 + xor t2, t2, a1 + sub.d t1, t2, a2 + andn t0, a3, t2 + + and t1, t0, t1 + beqz t1, L(find_loop) + revb.d t2, t2 + sub.d t1, t2, a2 + + + andn t0, a3, t2 + and t1, t0, t1 +L(count_pos): + ctz.d t0, t1 + addi.d a0, a0, 7 + + srli.d t0, t0, 3 + sub.d a0, a0, t0 + jr ra + nop + +L(find_end): + xor t2, t4, a1 + orn t2, t2, t5 + revb.d t2, t2 + sub.d t1, t2, a2 + + + andn t0, a3, t2 + and t1, t0, t1 + ctz.d t0, t1 + srli.d t0, t0, 3 + + addi.d a0, a4, -1 + sub.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra +END(STRRCHR) + +libc_hidden_builtin_def(STRRCHR) diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S new file mode 100644 index 0000000000..5a6e22979a --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S @@ -0,0 +1,176 @@ +/* Optimized strrchr implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) && !defined __loongarch_soft_float + +#define STRRCHR __strrchr_lasx + +LEAF(STRRCHR, 6) + move a2, a0 + bstrins.d a0, zero, 5, 0 + xvld xr0, a0, 0 + xvld xr1, a0, 32 + + li.d t2, -1 + xvreplgr2vr.b xr4, a1 + xvmsknz.b xr2, xr0 + xvmsknz.b xr3, xr1 + + xvpickve.w xr5, xr2, 4 + xvpickve.w xr6, xr3, 4 + vilvl.h vr2, vr5, vr2 + vilvl.h vr3, vr6, vr3 + + vilvl.w vr2, vr3, vr2 + movfr2gr.d t0, fa2 + sra.d t0, t0, a2 + beq t0, t2, L(find_tail) + + + xvseq.b xr2, xr0, xr4 + xvseq.b xr3, xr1, xr4 + xvmsknz.b xr2, xr2 + xvmsknz.b xr3, xr3 + + xvpickve.w xr4, xr2, 4 + xvpickve.w xr5, xr3, 4 + vilvl.h vr2, vr4, vr2 + vilvl.h vr3, vr5, vr3 + + vilvl.w vr1, vr3, vr2 + slli.d t3, t2, 1 + movfr2gr.d t1, fa1 + cto.d t0, t0 + + srl.d t1, t1, a2 + sll.d t3, t3, t0 + addi.d a0, a2, 63 + andn t1, t1, t3 + + + clz.d t0, t1 + sub.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra + + .align 5 +L(find_tail): + addi.d a3, a0, 64 +L(loop): + xvld xr2, a0, 64 + xvld xr3, a0, 96 + addi.d a0, a0, 64 + + xvmin.bu xr5, xr2, xr3 + xvsetanyeqz.b fcc0, xr5 + bceqz fcc0, L(loop) + xvmsknz.b xr5, xr2 + + + xvmsknz.b xr6, xr3 + xvpickve.w xr7, xr5, 4 + xvpickve.w xr8, xr6, 4 + vilvl.h vr5, vr7, vr5 + + vilvl.h vr6, vr8, vr6 + xvseq.b xr2, xr2, xr4 + xvseq.b xr3, xr3, xr4 + xvmsknz.b xr2, xr2 + + xvmsknz.b xr3, xr3 + xvpickve.w xr7, xr2, 4 + xvpickve.w xr8, xr3, 4 + vilvl.h vr2, vr7, vr2 + + vilvl.h vr3, vr8, vr3 + vilvl.w vr5, vr6, vr5 + vilvl.w vr2, vr3, vr2 + movfr2gr.d t0, fa5 + + + movfr2gr.d t1, fa2 + slli.d t3, t2, 1 + cto.d t0, t0 + sll.d t3, t3, t0 + + andn t1, t1, t3 + beqz t1, L(find_loop) + clz.d t0, t1 + addi.d a0, a0, 63 + + sub.d a0, a0, t0 + jr ra +L(find_loop): + beq a0, a3, L(find_end) + xvld xr2, a0, -64 + + xvld xr3, a0, -32 + addi.d a0, a0, -64 + xvseq.b xr2, xr2, xr4 + xvseq.b xr3, xr3, xr4 + + + xvmax.bu xr5, xr2, xr3 + xvseteqz.v fcc0, xr5 + bcnez fcc0, L(find_loop) + xvmsknz.b xr0, xr2 + + xvmsknz.b xr1, xr3 + xvpickve.w xr2, xr0, 4 + xvpickve.w xr3, xr1, 4 + vilvl.h vr0, vr2, vr0 + + vilvl.h vr1, vr3, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + addi.d a0, a0, 63 + + clz.d t0, t0 + sub.d a0, a0, t0 + jr ra + nop + + +L(find_end): + xvseq.b xr2, xr0, xr4 + xvseq.b xr3, xr1, xr4 + xvmsknz.b xr2, xr2 + xvmsknz.b xr3, xr3 + + xvpickve.w xr4, xr2, 4 + xvpickve.w xr5, xr3, 4 + vilvl.h vr2, vr4, vr2 + vilvl.h vr3, vr5, vr3 + + vilvl.w vr1, vr3, vr2 + movfr2gr.d t1, fa1 + addi.d a0, a2, 63 + srl.d t1, t1, a2 + + clz.d t0, t1 + sub.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra +END(STRRCHR) + +libc_hidden_builtin_def(STRRCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S new file mode 100644 index 0000000000..8f2fd22e50 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S @@ -0,0 +1,144 @@ +/* Optimized strrchr implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) && !defined __loongarch_soft_float + +#define STRRCHR __strrchr_lsx + +LEAF(STRRCHR, 6) + move a2, a0 + bstrins.d a0, zero, 4, 0 + vld vr0, a0, 0 + vld vr1, a0, 16 + + li.d t2, -1 + vreplgr2vr.b vr4, a1 + vmsknz.b vr2, vr0 + vmsknz.b vr3, vr1 + + vilvl.h vr2, vr3, vr2 + movfr2gr.s t0, fa2 + sra.w t0, t0, a2 + beq t0, t2, L(find_tail) + + vseq.b vr2, vr0, vr4 + vseq.b vr3, vr1, vr4 + vmsknz.b vr2, vr2 + vmsknz.b vr3, vr3 + + + vilvl.h vr1, vr3, vr2 + slli.d t3, t2, 1 + movfr2gr.s t1, fa1 + cto.w t0, t0 + + srl.w t1, t1, a2 + sll.d t3, t3, t0 + addi.d a0, a2, 31 + andn t1, t1, t3 + + clz.w t0, t1 + sub.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra + + .align 5 +L(find_tail): + addi.d a3, a0, 32 +L(loop): + vld vr2, a0, 32 + vld vr3, a0, 48 + addi.d a0, a0, 32 + + vmin.bu vr5, vr2, vr3 + vsetanyeqz.b fcc0, vr5 + bceqz fcc0, L(loop) + vmsknz.b vr5, vr2 + + vmsknz.b vr6, vr3 + vilvl.h vr5, vr6, vr5 + vseq.b vr2, vr2, vr4 + vseq.b vr3, vr3, vr4 + + vmsknz.b vr2, vr2 + vmsknz.b vr3, vr3 + vilvl.h vr2, vr3, vr2 + movfr2gr.s t0, fa5 + + + movfr2gr.s t1, fa2 + slli.d t3, t2, 1 + cto.w t0, t0 + sll.d t3, t3, t0 + + andn t1, t1, t3 + beqz t1, L(find_loop) + clz.w t0, t1 + addi.d a0, a0, 31 + + sub.d a0, a0, t0 + jr ra +L(find_loop): + beq a0, a3, L(find_end) + vld vr2, a0, -32 + + vld vr3, a0, -16 + addi.d a0, a0, -32 + vseq.b vr2, vr2, vr4 + vseq.b vr3, vr3, vr4 + + + vmax.bu vr5, vr2, vr3 + vseteqz.v fcc0, vr5 + bcnez fcc0, L(find_loop) + vmsknz.b vr0, vr2 + + vmsknz.b vr1, vr3 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + addi.d a0, a0, 31 + + clz.w t0, t0 + sub.d a0, a0, t0 + jr ra + nop + +L(find_end): + vseq.b vr2, vr0, vr4 + vseq.b vr3, vr1, vr4 + vmsknz.b vr2, vr2 + vmsknz.b vr3, vr3 + + + vilvl.h vr1, vr3, vr2 + movfr2gr.s t1, fa1 + addi.d a0, a2, 31 + srl.w t1, t1, a2 + + clz.w t0, t1 + sub.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra +END(STRRCHR) + +libc_hidden_builtin_def(STRRCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c new file mode 100644 index 0000000000..d9c9f660a0 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c @@ -0,0 +1,36 @@ +/* Multiple versions of strrchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +# define strrchr __redirect_strrchr +# include +# undef strrchr + +# define SYMBOL_NAME strrchr +# include "ifunc-strrchr.h" + +libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ()); +weak_alias (strrchr, rindex) +# ifdef SHARED +__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr); +# endif + +#endif -- cgit 1.4.1