about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc64')
-rw-r--r--sysdeps/powerpc/powerpc64/le/power10/memcpy.S198
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/Makefile2
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S26
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memcpy.c7
5 files changed, 238 insertions, 1 deletions
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
new file mode 100644
index 0000000000..ad1414db4a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
@@ -0,0 +1,198 @@
+/* Optimized memcpy implementation for POWER10.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+
+#ifndef MEMCPY
+# define MEMCPY memcpy
+#endif
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+	   Returns 'dst'.  */
+
+	.machine power9
+ENTRY_TOCLESS (MEMCPY, 5)
+	CALL_MCOUNT 3
+
+	/* Copy up to 16 bytes.  */
+	sldi	r6,r5,56	/* Prepare [l|st]xvl counter.  */
+	lxvl	v10,r4,r6
+	stxvl	v10,r3,r6
+	subic.	r6,r5,16	/* Return if len <= 16.  */
+	blelr
+
+	/* If len >= 256, assume nothing got copied before and copy
+	   again.  This might cause issues with overlapped memory, but memcpy
+	   is not expected to treat overlapped memory.  */
+	cmpdi	r5,256
+	bge	L(copy_ge_256)
+	/* 16 < len < 256 and the first 16 bytes have already been copied.  */
+	addi	r10,r3,16	/* Keep r3 intact as return value.  */
+	addi	r4,r4,16
+	subi	r5,r5,16
+	b	L(copy_lt_256)	/* Avoid the main loop if len < 256.  */
+
+	.p2align 5
+L(copy_ge_256):
+	mr	r10,r3		/* Keep r3 intact as return value.  */
+	/* Align dst to 16 bytes.  */
+	andi.	r9,r10,0xf
+	beq	L(dst_is_align_16)
+	lxv	v10,0(r4)
+	subfic	r12,r9,16
+	subf	r5,r12,r5
+	add	r4,r4,r12
+	stxv	v10,0(r3)
+	add	r10,r3,r12
+
+L(dst_is_align_16):
+	srdi	r9,r5,7		/* Divide by 128.  */
+	mtctr	r9
+	addi	r6,r4,64
+	addi	r7,r10,64
+
+
+	/* Main loop, copy 128 bytes per iteration.
+	   Use r6=src+64 and r7=dest+64 in order to reduce the dependency on
+	   r4 and r10.  */
+	.p2align 5
+L(copy_128):
+
+	lxv	v10, 0(r4)
+	lxv	v11, 16(r4)
+	lxv	v12, 32(r4)
+	lxv	v13, 48(r4)
+
+	addi	r4,r4,128
+
+	stxv	v10, 0(r10)
+	stxv	v11, 16(r10)
+	stxv	v12, 32(r10)
+	stxv	v13, 48(r10)
+
+	addi	r10,r10,128
+
+	lxv	v10, 0(r6)
+	lxv	v11, 16(r6)
+	lxv	v12, 32(r6)
+	lxv	v13, 48(r6)
+
+	addi	r6,r6,128
+
+	stxv	v10, 0(r7)
+	stxv	v11, 16(r7)
+	stxv	v12, 32(r7)
+	stxv	v13, 48(r7)
+
+	addi	r7,r7,128
+
+	bdnz	L(copy_128)
+
+	clrldi.	r5,r5,64-7	/* Have we copied everything?  */
+	beqlr
+
+	.p2align 5
+L(copy_lt_256):
+	cmpdi	r5,16
+	ble	L(copy_le_16)
+	srdi.	r9,r5,5		/* Divide by 32.  */
+	beq	L(copy_lt_32)
+	mtctr	r9
+	/* Use r6=src+32, r7=dest+32, r8=src+64, r9=dest+64 in order to reduce
+	   the dependency on r4 and r10.  */
+	addi	r6,r4,32
+	addi	r7,r10,32
+	addi	r8,r4,64
+	addi	r9,r10,64
+
+	.p2align 5
+	/* Copy 32 bytes at a time, unaligned.
+	   The loop is unrolled 3 times in order to reduce the dependency on
+	   r4 and r10, copying up-to 96 bytes per iteration.  */
+L(copy_32):
+	lxv	v10, 0(r4)
+	lxv	v11, 16(r4)
+	stxv	v10, 0(r10)
+	stxv	v11, 16(r10)
+	bdz	L(end_copy_32a)
+	addi	r4,r4,96
+	addi	r10,r10,96
+
+	lxv	v10, 0(r6)
+	lxv	v11, 16(r6)
+	addi	r6,r6,96
+	stxv	v10, 0(r7)
+	stxv	v11, 16(r7)
+	bdz	L(end_copy_32b)
+	addi	r7,r7,96
+
+	lxv	v12, 0(r8)
+	lxv	v13, 16(r8)
+	addi	r8,r8,96
+	stxv	v12, 0(r9)
+	stxv	v13, 16(r9)
+	addi	r9,r9,96
+	bdnz	L(copy_32)
+
+	clrldi.	r5,r5,64-5	/* Have we copied everything?  */
+	beqlr
+	cmpdi	r5,16
+	ble	L(copy_le_16)
+	b	L(copy_lt_32)
+
+	.p2align 5
+L(end_copy_32a):
+	clrldi.	r5,r5,64-5	/* Have we copied everything?  */
+	beqlr
+	/* 32 bytes have been copied since the last update of r4 and r10.  */
+	addi	r4,r4,32
+	addi	r10,r10,32
+	cmpdi	r5,16
+	ble	L(copy_le_16)
+	b	L(copy_lt_32)
+
+	.p2align 5
+L(end_copy_32b):
+	clrldi.	r5,r5,64-5	/* Have we copied everything?  */
+	beqlr
+	/* The last iteration of the loop copied 64 bytes.  Update r4 and r10
+	   accordingly.  */
+	addi	r4,r4,-32
+	addi	r10,r10,-32
+	cmpdi	r5,16
+	ble	L(copy_le_16)
+
+	.p2align 5
+L(copy_lt_32):
+	lxv	v10, 0(r4)
+	stxv	v10, 0(r10)
+	addi	r4,r4,16
+	addi	r10,r10,16
+	subi	r5,r5,16
+
+	.p2align 5
+L(copy_le_16):
+	sldi	r6,r5,56
+	lxvl	v10,r4,r6
+	stxvl	v10,r10,r6
+	blr
+
+
+END_GEN_TB (MEMCPY,TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index a82219c490..9ef12d3563 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -32,7 +32,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
 		   strncase-power8
 
 ifneq (,$(filter %le,$(config-machine)))
-sysdep_routines += memmove-power10 \
+sysdep_routines += memcpy-power10 memmove-power10 \
 		   strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \
 		   rawmemchr-power9 strlen-power9 strncpy-power9 stpncpy-power9 \
 		   strlen-power10
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index d00bcc8178..1ab56bb2c9 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -51,6 +51,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 #ifdef SHARED
   /* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c.  */
   IFUNC_IMPL (i, name, memcpy,
+#ifdef __LITTLE_ENDIAN__
+	      IFUNC_IMPL_ADD (array, i, memcpy,
+			      hwcap2 & PPC_FEATURE2_ARCH_3_1
+			      && hwcap & PPC_FEATURE_HAS_VSX,
+			      __memcpy_power10)
+#endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07,
 			      __memcpy_power8_cached)
 	      IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S
new file mode 100644
index 0000000000..70e0fc3ed6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S
@@ -0,0 +1,26 @@
+/* Optimized memcpy implementation for POWER10.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
+#define MEMCPY __memcpy_power10
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/le/power10/memcpy.S>
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
index 5733192932..53ab32ef26 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
@@ -36,8 +36,15 @@ extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_power8_cached attribute_hidden;
+# if defined __LITTLE_ENDIAN__
+extern __typeof (__redirect_memcpy) __memcpy_power10 attribute_hidden;
+# endif
 
 libc_ifunc (__libc_memcpy,
+# if defined __LITTLE_ENDIAN__
+	    (hwcap2 & PPC_FEATURE2_ARCH_3_1 && hwcap & PPC_FEATURE_HAS_VSX)
+	    ? __memcpy_power10 :
+# endif
 	    ((hwcap2 & PPC_FEATURE2_ARCH_2_07) && use_cached_memopt)
 	    ? __memcpy_power8_cached :
 	      (hwcap & PPC_FEATURE_HAS_VSX)