about summary refs log tree commit diff
path: root/sysdeps/powerpc
diff options
context:
space:
mode:
authorRajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>2017-10-06 10:04:52 +0530
committerRajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>2017-10-06 10:04:52 +0530
commit5a907168918805bbe3088dc4ab051e3e78ad7459 (patch)
tree3c93e0d5356b84b012b64eaeadd7d2e556d7afc2 /sysdeps/powerpc
parent0db0b931cf2bf506767be3f93519032f56723883 (diff)
downloadglibc-5a907168918805bbe3088dc4ab051e3e78ad7459.tar.gz
glibc-5a907168918805bbe3088dc4ab051e3e78ad7459.tar.xz
glibc-5a907168918805bbe3088dc4ab051e3e78ad7459.zip
powerpc: Fix IFUNC for memrchr
Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in
ifunc list.  Also handled discarding unwanted bytes for
unaligned inputs in power8 optimization.

2017-10-05  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert
	back to powerpc32 file.
	* sysdeps/powerpc/powerpc64/multiarch/memrchr.c
	(memrchr): Add __memrchr_power8 to ifunc list.
	* sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask
	extra bytes for unaligned inputs.
Diffstat (limited to 'sysdeps/powerpc')
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c15
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memrchr.c9
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memrchr.S24
3 files changed, 31 insertions, 17 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
index be24689336..e2d53ac0f4 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
@@ -15,17 +15,4 @@
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-
-#define MEMRCHR  __memrchr_ppc
-
-#undef weak_alias
-#define weak_alias(a, b)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-extern __typeof (memrchr) __memrchr_ppc attribute_hidden;
-
-#include <string/memrchr.c>
+#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
index fb09fdf89c..7d52af0548 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
@@ -23,13 +23,16 @@
 
 extern __typeof (__memrchr) __memrchr_ppc attribute_hidden;
 extern __typeof (__memrchr) __memrchr_power7 attribute_hidden;
+extern __typeof (__memrchr) __memrchr_power8 attribute_hidden;
 
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
 libc_ifunc (__memrchr,
-	    (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __memrchr_power7
-            : __memrchr_ppc);
+	    (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+	    ? __memrchr_power8 :
+	      (hwcap & PPC_FEATURE_HAS_VSX)
+	      ? __memrchr_power7
+	    : __memrchr_ppc);
 
 weak_alias (__memrchr, memrchr)
 #else
diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S
index 521b3c84a2..22b01ec69c 100644
--- a/sysdeps/powerpc/powerpc64/power8/memrchr.S
+++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S
@@ -233,11 +233,35 @@ L(found):
 #endif
 	addi	r8, r8, 63
 	sub	r3, r8, r6	/* Compute final address.  */
+	cmpld	cr7, r3, r10
+	bgelr	cr7
+	li	r3, 0
 	blr
 
 	/* Found a match in last 16 bytes.  */
 	.align	4
 L(found_16B):
+	cmpld	r8, r10		/* Are we on the last QW?  */
+	bge	L(last)
+	/* Now discard bytes before starting address.  */
+	sub	r9, r10, r8
+	MTVRD(v9, r9)
+	vspltisb	v8, 3
+	/* Mask unwanted bytes.  */
+#ifdef __LITTLE_ENDIAN__
+	lvsr	v7, 0, r10
+	vperm   v6, v0, v6, v7
+	vsldoi	v9, v0, v9, 8
+	vsl	v9, v9, v8
+	vslo	v6, v6, v9
+#else
+	lvsl	v7, 0, r10
+	vperm   v6, v6, v0, v7
+	vsldoi	v9, v0, v9, 8
+	vsl	v9, v9, v8
+	vsro	v6, v6, v9
+#endif
+L(last):
 	/* Permute the first bit of each byte into bits 48-63.  */
 	VBPERMQ(v6, v6, v10)
 	/* Shift each component into its correct position for merging.  */