about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcmp.S30
-rw-r--r--sysdeps/powerpc/powerpc64/power9/strcmp.S30
3 files changed, 22 insertions, 44 deletions
diff --git a/ChangeLog b/ChangeLog
index 6399c1f521..769e73819b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-02-07  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/power8/strcmp.S: Adjust address for
+	unaligned load for shorter strings.
+	* sysdeps/powerpc/powerpc64/power9/strcmp.S: Likewise.
+
 2017-02-06  Joseph Myers  <joseph@codesourcery.com>
 
 	* math/libm-test-driver.c (flag_test_errno): New variable.
diff --git a/sysdeps/powerpc/powerpc64/power8/strcmp.S b/sysdeps/powerpc/powerpc64/power8/strcmp.S
index c34ff4a23b..d46bff80cd 100644
--- a/sysdeps/powerpc/powerpc64/power8/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/strcmp.S
@@ -30,21 +30,21 @@
 EALIGN (strcmp, 4, 0)
 	li	r0,0
 
-	/* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using
+	/* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using
 	   the code:
 
 	    (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
 
-	   with PAGE_SIZE being 4096 and ITER_SIZE begin 32.  */
+	   with PAGE_SIZE being 4096 and ITER_SIZE begin 16.  */
 
 	rldicl	r7,r3,0,52
 	rldicl	r9,r4,0,52
-	cmpldi	cr7,r7,4096-32
+	cmpldi	cr7,r7,4096-16
 	bgt	cr7,L(pagecross_check)
-	cmpldi	cr5,r9,4096-32
+	cmpldi	cr5,r9,4096-16
 	bgt	cr5,L(pagecross_check)
 
-	/* For short string up to 32 bytes, load both s1 and s2 using
+	/* For short string up to 16 bytes, load both s1 and s2 using
 	   unaligned dwords and compare.  */
 	ld	r8,0(r3)
 	ld	r10,0(r4)
@@ -60,25 +60,11 @@ EALIGN (strcmp, 4, 0)
 	orc.	r9,r12,r11
 	bne	cr0,L(different_nocmpb)
 
-	ld	r8,16(r3)
-	ld	r10,16(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	ld	r8,24(r3)
-	ld	r10,24(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	addi	r7,r3,32
-	addi	r4,r4,32
+	addi	r7,r3,16
+	addi	r4,r4,16
 
 L(align_8b):
-	/* Now it has checked for first 32 bytes, align source1 to doubleword
+	/* Now it has checked for first 16 bytes, align source1 to doubleword
 	   and adjust source2 address.  */
 	rldicl	r9,r7,0,61	/* source1 alignment to doubleword  */
 	subf	r4,r9,r4	/* Adjust source2 address based on source1
diff --git a/sysdeps/powerpc/powerpc64/power9/strcmp.S b/sysdeps/powerpc/powerpc64/power9/strcmp.S
index 3e32396c94..17ec8c24c3 100644
--- a/sysdeps/powerpc/powerpc64/power9/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/power9/strcmp.S
@@ -65,21 +65,21 @@
 EALIGN (strcmp, 4, 0)
 	li	r0, 0
 
-	/* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using
+	/* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using
 	   the code:
 
 	    (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
 
-	   with PAGE_SIZE being 4096 and ITER_SIZE begin 32.  */
+	   with PAGE_SIZE being 4096 and ITER_SIZE begin 16.  */
 
 	rldicl	r7, r3, 0, 52
 	rldicl	r9, r4, 0, 52
-	cmpldi	cr7, r7, 4096-32
+	cmpldi	cr7, r7, 4096-16
 	bgt	cr7, L(pagecross_check)
-	cmpldi	cr5, r9, 4096-32
+	cmpldi	cr5, r9, 4096-16
 	bgt	cr5, L(pagecross_check)
 
-	/* For short strings up to 32 bytes,  load both s1 and s2 using
+	/* For short strings up to 16 bytes,  load both s1 and s2 using
 	   unaligned dwords and compare.  */
 	ld	r8, 0(r3)
 	ld	r10, 0(r4)
@@ -95,25 +95,11 @@ EALIGN (strcmp, 4, 0)
 	orc.	r9, r12, r11
 	bne	cr0, L(different_nocmpb)
 
-	ld	r8, 16(r3)
-	ld	r10, 16(r4)
-	cmpb	r12, r8, r0
-	cmpb	r11, r8, r10
-	orc.	r9, r12, r11
-	bne	cr0, L(different_nocmpb)
-
-	ld	r8, 24(r3)
-	ld	r10, 24(r4)
-	cmpb	r12, r8, r0
-	cmpb	r11, r8, r10
-	orc.	r9, r12, r11
-	bne	cr0, L(different_nocmpb)
-
-	addi	r7, r3, 32
-	addi	r4, r4, 32
+	addi	r7, r3, 16
+	addi	r4, r4, 16
 
 L(align):
-	/* Now it has checked for first 32 bytes.  */
+	/* Now it has checked for first 16 bytes.  */
 	vspltisb	v0, 0
 	vspltisb	v2, -1
 	lvsr	v6, 0, r4   /* Compute mask.  */