/* Optimized strcpy unaligned implementation using basic LoongArch instructions. Copyright (C) 2023 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library. If not, see . */ #include #include #include #if IS_IN (libc) # ifndef STRCPY # define STRCPY __strcpy_unaligned # endif # ifdef USE_AS_STPCPY # define dstend a0 # else # define dstend a4 # endif LEAF(STRCPY, 6) lu12i.w t5, 0x01010 li.w t0, 0xff8 ori t5, t5, 0x101 andi t1, a1, 0xfff bstrins.d t5, t5, 63, 32 move a2, a0 slli.d t6, t5, 7 bltu t0, t1, L(page_cross) L(start_entry): ld.d t0, a1, 0 li.d t3, 8 andi a3, a1, 0x7 sub.d t1, t0, t5 andn t2, t6, t0 sub.d t3, t3, a3 and t1, t1, t2 bnez t1, L(end) add.d a1, a1, t3 st.d t0, a2, 0 add.d a2, a2, t3 ld.d t0, a1, 0 sub.d t1, t0, t5 andn t2, t6, t0 and t1, t1, t2 bnez t1, L(long_end) L(loop): st.d t0, a2, 0 ld.d t0, a1, 8 addi.d a2, a2, 8 addi.d a1, a1, 8 sub.d t1, t0, t5 andn t2, t6, t0 and t1, t1, t2 beqz t1, L(loop) L(long_end): ctz.d t1, t1 srli.d t1, t1, 3 add.d a1, a1, t1 ld.d t0, a1, -7 add.d dstend, a2, t1 st.d t0, dstend, -7 jr ra nop L(end): ctz.d t1, t1 srli.d t1, t1, 3 add.d a3, a1, t1 add.d dstend, a2, t1 L(less_8): li.d t0, 3 bltu t1, t0, L(less_3) ld.w t1, a1, 0 ld.w t2, a3, -3 st.w t1, a2, 0 st.w t2, dstend, -3 jr ra L(less_3): beqz t1, L(zero_bytes) ld.h t1, a1, 0 st.h t1, a2, 0 L(zero_bytes): st.b zero, dstend, 0 jr ra L(page_cross): move a4, a1 bstrins.d a4, zero, 2, 0 ld.d t0, a4, 0 li.d t3, -1 slli.d t4, a1, 3 srl.d t3, t3, t4 srl.d t0, t0, t4 orn t0, t0, t3 sub.d t1, t0, t5 andn t2, t6, t0 and t1, t1, t2 beqz t1, L(start_entry) b L(end) END(STRCPY) libc_hidden_builtin_def (STRCPY) #endif