about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strstr.S22
2 files changed, 19 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index a442ee1e12..13fc3fb98a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-08-25  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
+
 2015-08-25  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* sysdeps/x86_64/strlen.S: Replace %xmm[8-12] with %xmm[0-4].
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
index bfb0c4992a..fb3c810ac6 100644
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
@@ -23,6 +23,8 @@
 /* The performance gain is obtained using aligned memory access, load
  * doubleword and usage of cmpb instruction for quicker comparison.  */
 
+#define ITERATIONS	64
+
 #ifndef STRLEN
 /* For builds with no IFUNC support, local calls should be made to internal
    GLIBC symbol (created by libc_hidden_builtin_def).  */
@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0)
 	cfi_offset(r30, -16)
 	std	r29, -24(r1)		/* Save callers register r29.  */
 	cfi_offset(r29, -24)
+	std	r28, -32(r1)		/* Save callers register r28.  */
+	cfi_offset(r28, -32)
 	std	r0, 16(r1)		/* Store the link register.  */
 	cfi_offset(lr, 16)
 	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0)
 
 	dcbt	0, r3
 	dcbt	0, r4
-
 	cmpdi	cr7, r3, 0
 	beq	cr7, L(retnull)
 	cmpdi	cr7, r4, 0
@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0)
 	cmpdi	cr7, r3, 0	/* If search str is null.  */
 	beq	cr7, L(ret_r3)
 
-	/* Call __strstr_ppc if needle len > 2048 */
-	cmpdi	cr7, r3, 2048
-	bgt	cr7, L(default)
-
 	mr	r31, r3
 	mr	r4, r3
 	mr	r3, r29
@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0)
 	/* If first char of search str is not present.  */
 	cmpdi	cr7, r3, 0
 	ble	cr7, L(end)
-
+	/* Reg r28 is used to count the number of iterations. */
+	li	r28, 0
 	rldicl	r8, r3, 0, 52	/* Page cross check.  */
 	cmpldi	cr7, r8, 4096-16
 	bgt	cr7, L(bytebybyte)
@@ -324,6 +324,10 @@ L(return4):
 	.align	4
 L(begin):
 	mr	r3, r8
+	/* When our iterations exceed ITERATIONS,fall back to default. */
+	addi	r28, r28, 1
+	cmpdi	cr7, r28, ITERATIONS
+	beq	cr7, L(default)
 	lbz	r4, 0(r30)
 	bl	STRCHR
 	nop
@@ -423,6 +427,10 @@ L(nextbyte):
 	cmpdi	cr7, r9, -1
 	beq	cr7, L(end)
 	addi	r3, r4, 1
+	/* When our iterations exceed ITERATIONS,fall back to default. */
+	addi	r28, r28, 1
+	cmpdi	cr7, r28, ITERATIONS
+	beq	cr7, L(default)
 	lbz	r4, 0(r30)
 	bl	STRCHR
 	nop
@@ -490,7 +498,6 @@ L(retnull):
 
 	.align	4
 L(default):
-	mr	r3, r29
 	mr	r4, r30
 	bl	__strstr_ppc
 	nop
@@ -500,6 +507,7 @@ L(end):
 	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
 	cfi_adjust_cfa_offset(-FRAMESIZE)
 	ld	r0, 16(r1)	/* Restore the saved link register.  */
+	ld	r28, -32(r1)	/* Restore callers save register r28.  */
 	ld	r29, -24(r1)	/* Restore callers save register r29.  */
 	ld	r30, -16(r1)	/* Restore callers save register r30.  */
 	ld	r31, -8(r1)	/* Restore callers save register r31.  */