/* Optimized memrchr implementation using LoongArch LASX instructions. Copyright (C) 2023 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library. If not, see . */ #include #include #include #if IS_IN (libc) && !defined __loongarch_soft_float #ifndef MEMRCHR # define MEMRCHR __memrchr_lasx #endif LEAF(MEMRCHR, 6) beqz a2, L(ret0) addi.d a2, a2, -1 add.d a3, a0, a2 andi t1, a3, 0x3f bstrins.d a3, zero, 5, 0 addi.d t1, t1, 1 xvld xr0, a3, 0 xvld xr1, a3, 32 sub.d t2, zero, t1 li.d t3, -1 xvreplgr2vr.b xr2, a1 andi t4, a0, 0x3f srl.d t2, t3, t2 xvseq.b xr0, xr0, xr2 xvseq.b xr1, xr1, xr2 xvmsknz.b xr0, xr0 xvmsknz.b xr1, xr1 xvpickve.w xr3, xr0, 4 xvpickve.w xr4, xr1, 4 vilvl.h vr0, vr3, vr0 vilvl.h vr1, vr4, vr1 vilvl.w vr0, vr1, vr0 movfr2gr.d t0, fa0 and t0, t0, t2 bltu a2, t1, L(end) bnez t0, L(found) bstrins.d a0, zero, 5, 0 L(loop): xvld xr0, a3, -64 xvld xr1, a3, -32 addi.d a3, a3, -64 xvseq.b xr0, xr0, xr2 xvseq.b xr1, xr1, xr2 beq a0, a3, L(out) xvmax.bu xr3, xr0, xr1 xvseteqz.v fcc0, xr3 bcnez fcc0, L(loop) xvmsknz.b xr0, xr0 xvmsknz.b xr1, xr1 xvpickve.w xr3, xr0, 4 xvpickve.w xr4, xr1, 4 vilvl.h vr0, vr3, vr0 vilvl.h vr1, vr4, vr1 vilvl.w vr0, vr1, vr0 movfr2gr.d t0, fa0 L(found): addi.d a0, a3, 63 clz.d t1, t0 sub.d a0, a0, t1 jr ra L(out): xvmsknz.b xr0, xr0 xvmsknz.b xr1, xr1 xvpickve.w xr3, xr0, 4 xvpickve.w xr4, xr1, 4 vilvl.h vr0, vr3, vr0 vilvl.h vr1, vr4, vr1 vilvl.w vr0, vr1, vr0 movfr2gr.d t0, fa0 L(end): sll.d t2, t3, t4 and t0, t0, t2 addi.d a0, a3, 63 clz.d t1, t0 sub.d a0, a0, t1 maskeqz a0, a0, t0 jr ra L(ret0): move a0, zero jr ra END(MEMRCHR) libc_hidden_builtin_def (MEMRCHR) #endif