about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/multiarch
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-12-31 11:47:41 -0500
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2015-01-13 11:28:44 -0500
commitf06a4faf8a2b4d046eb40e94b47948cc47d79902 (patch)
tree846d1fc4c0ce0be53ef275c227b2accab263bbda /sysdeps/powerpc/powerpc64/multiarch
parent9f2f36e5a91c2ce6edba5415e176155eb1008ae1 (diff)
downloadglibc-f06a4faf8a2b4d046eb40e94b47948cc47d79902.tar.gz
glibc-f06a4faf8a2b4d046eb40e94b47948cc47d79902.tar.xz
glibc-f06a4faf8a2b4d046eb40e94b47948cc47d79902.zip
powerpc: Optimized st{r,p}ncpy for POWER8/PPC64
This patch adds an optimized POWER8 st{r,p}ncpy using unaligned accesses.
It shows 10%-80% improvement over the optimized POWER7 one that uses
only aligned accesses, specially on unaligned inputs.

The algorithm first read and check 16 bytes (if inputs do not cross a 4K
page size).  The it realign source to 16-bytes and issue a 16 bytes read
and compare loop to speedup null byte checks for large strings.  Also,
different from POWER7 optimization, the null pad is done inline in the
implementation using possible unaligned accesses, instead of realying on
a memset call.  Special case is added for page cross reads.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch')
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/Makefile5
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S39
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/stpncpy.c7
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S40
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/strncpy.c7
6 files changed, 98 insertions, 6 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 74b2daac1d..18d337843c 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -17,9 +17,10 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   stpcpy-power7 stpcpy-ppc64 \
 		   strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \
 		   strncpy-power7 strncpy-ppc64 \
-		   stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \
+		   stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \
+		   strcmp-power7 strcmp-ppc64 \
 		   strcat-power8 strcat-power7 strcat-ppc64 memmove-power7 \
-		   memmove-ppc64 bcopy-ppc64
+		   memmove-ppc64 bcopy-ppc64 strncpy-power8
 
 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
 CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index dbb21fddb9..132cb13eac 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -279,6 +279,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strncpy.c.  */
   IFUNC_IMPL (i, name, strncpy,
 	      IFUNC_IMPL_ADD (array, i, strncpy,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __strncpy_power8)
+	      IFUNC_IMPL_ADD (array, i, strncpy,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 			      __strncpy_power7)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1,
@@ -287,6 +290,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/stpncpy.c.  */
   IFUNC_IMPL (i, name, stpncpy,
 	      IFUNC_IMPL_ADD (array, i, stpncpy,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __stpncpy_power8)
+	      IFUNC_IMPL_ADD (array, i, stpncpy,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 			      __stpncpy_power7)
 	      IFUNC_IMPL_ADD (array, i, stpncpy, 1,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S
new file mode 100644
index 0000000000..d5d835de91
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S
@@ -0,0 +1,39 @@
+/* Optimized stpncpy implementation for POWER8.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define USE_AS_STPNCPY
+
+#undef EALIGN
+#define EALIGN(name, alignt, words)				\
+  .section ".text";						\
+  ENTRY_2(__stpncpy_power8)					\
+  .align ALIGNARG(alignt);					\
+  EALIGN_W_##words;						\
+  BODY_LABEL(__stpncpy_power8):					\
+  cfi_startproc;						\
+  LOCALENTRY(__stpncpy_power8)
+
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__stpncpy_power8)					\
+  END_2(__stpncpy_power8)
+
+#include <sysdeps/powerpc/powerpc64/power8/stpncpy.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c b/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c
index 9e5a270303..0f4072f982 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c
@@ -23,10 +23,13 @@
 
 extern __typeof (__stpncpy) __stpncpy_ppc attribute_hidden;
 extern __typeof (__stpncpy) __stpncpy_power7 attribute_hidden;
+extern __typeof (__stpncpy) __stpncpy_power8 attribute_hidden;
 
 libc_ifunc (__stpncpy,
-            (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __stpncpy_power7
+            (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+            ? __stpncpy_power8 :
+              (hwcap & PPC_FEATURE_HAS_VSX)
+              ? __stpncpy_power7
             : __stpncpy_ppc);
 
 weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S
new file mode 100644
index 0000000000..ed906a4394
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S
@@ -0,0 +1,40 @@
+/* Optimized strncpy implementation for POWER8.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words)				\
+  .section ".text";						\
+  ENTRY_2(__strncpy_power8)					\
+  .align ALIGNARG(alignt);					\
+  EALIGN_W_##words;						\
+  BODY_LABEL(__strncpy_power8):					\
+  cfi_startproc;						\
+  LOCALENTRY(__strncpy_power8)
+
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__strncpy_power8)					\
+  END_2(__strncpy_power8)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strncpy.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c
index ae4e97a265..ffb0f23643 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c
@@ -24,12 +24,15 @@
 
 extern __typeof (strncpy) __strncpy_ppc attribute_hidden;
 extern __typeof (strncpy) __strncpy_power7 attribute_hidden;
+extern __typeof (strncpy) __strncpy_power8 attribute_hidden;
 
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
  ifunc symbol properly. */
 libc_ifunc (strncpy,
-            (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __strncpy_power7
+            (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+            ? __strncpy_power8 :
+              (hwcap & PPC_FEATURE_HAS_VSX)
+              ? __strncpy_power7
             : __strncpy_ppc);
 
 #endif