about summary refs log tree commit diff
path: root/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S')
-rw-r--r--sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S138
1 files changed, 138 insertions, 0 deletions
diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
new file mode 100644
index 0000000000..9e31883bb1
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
@@ -0,0 +1,138 @@
+/* Optimized strcpy unaligned implementation using basic LoongArch
+   instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+
+# ifndef STRCPY
+#  define STRCPY __strcpy_unaligned
+# endif
+
+# ifdef USE_AS_STPCPY
+#  define dstend a0
+# else
+#  define dstend a4
+# endif
+
+LEAF(STRCPY, 6)
+    lu12i.w     t5, 0x01010
+    li.w        t0, 0xff8
+    ori         t5, t5, 0x101
+    andi        t1, a1, 0xfff
+
+    bstrins.d   t5, t5, 63, 32
+    move        a2, a0
+    slli.d      t6, t5, 7
+    bltu        t0, t1, L(page_cross)
+
+L(start_entry):
+    ld.d        t0, a1, 0
+    li.d        t3, 8
+    andi        a3, a1, 0x7
+    sub.d       t1, t0, t5
+
+    andn        t2, t6, t0
+    sub.d       t3, t3, a3
+    and         t1, t1, t2
+    bnez        t1, L(end)
+
+
+    add.d       a1, a1, t3
+    st.d        t0, a2, 0
+    add.d       a2, a2, t3
+    ld.d        t0, a1, 0
+
+    sub.d       t1, t0, t5
+    andn        t2, t6, t0
+    and         t1, t1, t2
+    bnez        t1, L(long_end)
+
+L(loop):
+    st.d        t0, a2, 0
+    ld.d        t0, a1, 8
+    addi.d      a2, a2, 8
+    addi.d      a1, a1, 8
+
+    sub.d       t1, t0, t5
+    andn        t2, t6, t0
+    and         t1, t1, t2
+    beqz        t1, L(loop)
+
+
+L(long_end):
+    ctz.d       t1, t1
+    srli.d      t1, t1, 3
+    add.d       a1, a1, t1
+    ld.d        t0, a1, -7
+
+    add.d       dstend, a2, t1
+    st.d        t0, dstend, -7
+    jr          ra
+    nop
+
+L(end):
+    ctz.d       t1, t1
+    srli.d      t1, t1, 3
+    add.d       a3, a1, t1
+    add.d       dstend, a2, t1
+
+L(less_8):
+    li.d        t0, 3
+    bltu        t1, t0, L(less_3)
+    ld.w        t1, a1, 0
+    ld.w        t2, a3, -3
+
+
+    st.w        t1, a2, 0
+    st.w        t2, dstend, -3
+    jr          ra
+L(less_3):
+    beqz        t1, L(zero_bytes)
+
+    ld.h        t1, a1, 0
+    st.h        t1, a2, 0
+L(zero_bytes):
+    st.b        zero, dstend, 0
+    jr          ra
+
+L(page_cross):
+    move        a4, a1
+    bstrins.d   a4, zero, 2, 0
+    ld.d        t0, a4, 0
+    li.d        t3, -1
+
+    slli.d      t4, a1, 3
+    srl.d       t3, t3, t4
+    srl.d       t0, t0, t4
+    orn         t0, t0, t3
+
+
+    sub.d       t1, t0, t5
+    andn        t2, t6, t0
+    and         t1, t1, t2
+    beqz        t1, L(start_entry)
+
+    b           L(end)
+END(STRCPY)
+
+libc_hidden_builtin_def (STRCPY)
+#endif