about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSteve Ellcey <sellcey@caviumnetworks.com>2018-02-22 08:38:47 -0800
committerSteve Ellcey <sellcey@caviumnetworks.com>2018-02-22 08:38:47 -0800
commite9537dddc7c7c7b60b55ed845542c8d586164488 (patch)
treeae653efab8d31c8d5056f0d29fef30c19c0fd260
parentda81ae645d8ee89052f109c814a68a9489f562e6 (diff)
downloadglibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.gz
glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.xz
glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.zip
IFUNC for Cavium ThunderX2
	* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
	Add memcpy_thunderx2.
	* sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
	Increment to 4.
	(__libc_ifunc_impl_list): Add __memcpy_thunderx2.
	* sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2
	and IS_THUNDERX2PA checks.
	* sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2):
	Use macro to set name appropriately.
	(memcpy): Use USE_THUNDERX2 macro to modify prefetches.
	* sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file.
	* sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA):
	New macro.
	(IS_THUNDERX2): New macro.
-rw-r--r--ChangeLog17
-rw-r--r--sysdeps/aarch64/multiarch/Makefile4
-rw-r--r--sysdeps/aarch64/multiarch/ifunc-impl-list.c3
-rw-r--r--sysdeps/aarch64/multiarch/memcpy.c5
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_thunderx.S22
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_thunderx2.S27
-rw-r--r--sysdeps/unix/sysv/linux/aarch64/cpu-features.h5
7 files changed, 73 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index d33b47053b..b47ef63266 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2018-02-22  Steve Ellcey  <sellcey@cavium.com>
+
+	* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
+	Add memcpy_thunderx2.
+	* sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
+	Increment to 4.
+	(__libc_ifunc_impl_list): Add __memcpy_thunderx2.
+	* sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2
+	and IS_THUNDERX2PA checks.
+	* sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2):
+	Use macro to set name appropriately.
+	(memcpy): Use USE_THUNDERX2 macro to modify prefetches.
+	* sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file.
+	* sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA):
+	New macro.
+	(IS_THUNDERX2): New macro.
+
 2018-02-22  Stefan Liebler  <stli@linux.vnet.ibm.com>
 
 	* sysdeps/s390/fpu/libm-test-ulps: Regenerated.
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index aa179c499e..57ffdf7238 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -1,4 +1,4 @@
 ifeq ($(subdir),string)
-sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
-		   memmove_falkor memset_generic memset_falkor
+sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \
+		   memcpy_falkor memmove_falkor memset_generic memset_falkor
 endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index f84956c023..e55be80103 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -25,7 +25,7 @@
 #include <stdio.h>
 
 /* Maximum number of IFUNC implementations.  */
-#define MAX_IFUNC	3
+#define MAX_IFUNC	4
 
 size_t
 __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -40,6 +40,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c.  */
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
   IFUNC_IMPL (i, name, memmove,
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 3efea2c644..b94c655f9d 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -30,6 +30,7 @@ extern __typeof (__redirect_memcpy) __libc_memcpy;
 
 extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
 
 libc_ifunc (__libc_memcpy,
@@ -37,7 +38,9 @@ libc_ifunc (__libc_memcpy,
 	     ? __memcpy_thunderx
 	     : (IS_FALKOR (midr)
 		? __memcpy_falkor
-		: __memcpy_generic)));
+		: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
+		  ? __memcpy_thunderx2
+		  : __memcpy_generic))));
 
 # undef memcpy
 strong_alias (__libc_memcpy, memcpy);
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index 4f6921d680..de494d933d 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -74,11 +74,13 @@
 
 #if IS_IN (libc)
 
-# undef MEMCPY
-# define MEMCPY __memcpy_thunderx
-# undef MEMMOVE
-# define MEMMOVE __memmove_thunderx
-# define USE_THUNDERX
+# ifndef USE_THUNDERX2
+#  undef MEMCPY
+#  define MEMCPY __memcpy_thunderx
+#  undef MEMMOVE
+#  define MEMMOVE __memmove_thunderx
+#  define USE_THUNDERX
+# endif
 
 ENTRY_ALIGN (MEMMOVE, 6)
 
@@ -180,7 +182,7 @@ L(copy96):
 	.p2align 4
 L(copy_long):
 
-# ifdef USE_THUNDERX
+# if defined(USE_THUNDERX) || defined (USE_THUNDERX2)
 
 	/* On thunderx, large memcpy's are helped by software prefetching.
 	   This loop is identical to the one below it but with prefetching
@@ -194,7 +196,11 @@ L(copy_long):
 	bic	dst, dstin, 15
 	ldp	D_l, D_h, [src]
 	sub	src, src, tmp1
+#  if defined(USE_THUNDERX)
 	prfm	pldl1strm, [src, 384]
+#  elif defined(USE_THUNDERX2)
+	prfm	pldl1strm, [src, 256]
+#  endif
 	add	count, count, tmp1	/* Count is now 16 too large.  */
 	ldp	A_l, A_h, [src, 16]
 	stp	D_l, D_h, [dstin]
@@ -204,9 +210,13 @@ L(copy_long):
 	subs	count, count, 128 + 16	/* Test and readjust count.  */
 
 L(prefetch_loop64):
+#  if defined(USE_THUNDERX)
 	tbz	src, #6, 1f
 	prfm	pldl1strm, [src, 512]
 1:
+#  elif defined(USE_THUNDERX2)
+	prfm	pldl1strm, [src, 256]
+#  endif
 	stp	A_l, A_h, [dst, 16]
 	ldp	A_l, A_h, [src, 16]
 	stp	B_l, B_h, [dst, 32]
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
new file mode 100644
index 0000000000..8501abf725
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
@@ -0,0 +1,27 @@
+/* A Thunderx2 Optimized memcpy implementation for AARCH64.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The actual code in this memcpy and memmove is in memcpy_thunderx.S.
+   The only real differences are with the prefetching instructions.  */
+
+#define MEMCPY __memcpy_thunderx2
+#define MEMMOVE __memmove_thunderx2
+#define USE_THUNDERX2
+
+#include "memcpy_thunderx.S"
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index c646f9dad1..cde655b9bd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -41,6 +41,11 @@
 #define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C'	\
 			   && MIDR_PARTNUM(midr) == 0x0a1)
 
+#define IS_THUNDERX2PA(midr) (MIDR_IMPLEMENTOR(midr) == 'B'     \
+			   && MIDR_PARTNUM(midr) == 0x516)
+#define IS_THUNDERX2(midr) (MIDR_IMPLEMENTOR(midr) == 'C'       \
+			   && MIDR_PARTNUM(midr) == 0xaf)
+
 #define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q'			      \
                         && MIDR_PARTNUM(midr) == 0xc00)