about summary refs log tree commit diff
path: root/powerpc-cpu/sysdeps/powerpc/powerpc64/power4/strncmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'powerpc-cpu/sysdeps/powerpc/powerpc64/power4/strncmp.S')
-rw-r--r--powerpc-cpu/sysdeps/powerpc/powerpc64/power4/strncmp.S180
1 files changed, 180 insertions, 0 deletions
diff --git a/powerpc-cpu/sysdeps/powerpc/powerpc64/power4/strncmp.S b/powerpc-cpu/sysdeps/powerpc/powerpc64/power4/strncmp.S
new file mode 100644
index 0000000000..7a1665d2bc
--- /dev/null
+++ b/powerpc-cpu/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -0,0 +1,180 @@
+/* Optimized strcmp implementation for PowerPC64.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+
+EALIGN (BP_SYM(strncmp), 4, 0)
+	CALL_MCOUNT 3
+
+#define rTMP	r0
+#define rRTN	r3
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#define rN	r5	/* max string length */
+/* Note:  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
+#define rWORD1	r6	/* current word in s1 */
+#define rWORD2	r7	/* current word in s2 */
+#define rWORD3  r10
+#define rWORD4  r11
+#define rFEFE	r8	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
+#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
+#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+
+	dcbt	0,rSTR1
+	or	rTMP, rSTR2, rSTR1
+	lis	r7F7F, 0x7f7f
+	dcbt	0,rSTR2
+	clrldi.	rTMP, rTMP, 61
+	cmpldi	cr1, rN, 0
+	lis	rFEFE, -0x101
+	bne	L(unaligned)
+/* We are doubleword alligned so set up for two loops.  first a double word
+   loop, then fall into the byte loop if any residual.  */
+	srdi.	rTMP, rN, 3
+	clrldi	rN, rN, 61
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	cmpldi	cr1, rN, 0	
+	beq	L(unaligned)
+
+	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+	sldi	rTMP, rFEFE, 32
+	insrdi	r7F7F, r7F7F, 32, 0
+	add	rFEFE, rFEFE, rTMP
+	b	L(g1)
+
+L(g0):	
+	ldu	rWORD1, 8(rSTR1)
+	bne-	cr1, L(different)
+	ldu	rWORD2, 8(rSTR2)
+L(g1):	add	rTMP, rFEFE, rWORD1
+	nor	rNEG, r7F7F, rWORD1
+	bdz	L(tail)
+	and.	rTMP, rTMP, rNEG
+	cmpd	cr1, rWORD1, rWORD2
+	beq+	L(g0)
+	
+/* OK. We've hit the end of the string. We need to be careful that
+   we don't compare two strings as different because of gunk beyond
+   the end of the strings...  */
+	
+L(endstring):
+	and	rTMP, r7F7F, rWORD1
+	beq	cr1, L(equal)
+	add	rTMP, rTMP, r7F7F
+	xor.	rBITDIF, rWORD1, rWORD2
+
+	andc	rNEG, rNEG, rTMP
+	blt-	L(highbit)
+	cntlzd	rBITDIF, rBITDIF
+	cntlzd	rNEG, rNEG
+	addi	rNEG, rNEG, 7
+	cmpd	cr1, rNEG, rBITDIF
+	sub	rRTN, rWORD1, rWORD2
+	blt-	cr1, L(equal)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ldu	rWORD1, -8(rSTR1)
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	srdi	rWORD2, rWORD2, 56
+	srdi	rWORD1, rWORD1, 56
+	sub	rRTN, rWORD1, rWORD2
+	blr
+
+
+/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
+	.align 4
+L(tail):
+	and.	rTMP, rTMP, rNEG
+	cmpd	cr1, rWORD1, rWORD2
+	bne-	L(endstring)
+	addi	rSTR1, rSTR1, 8
+	bne-	cr1, L(different)
+	addi	rSTR2, rSTR2, 8
+	cmpldi	cr1, rN, 0
+L(unaligned):
+	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	ble	cr1, L(ux)
+L(uz):
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	.align 4
+L(u1):
+	cmpdi	cr1, rWORD1, 0
+	bdz	L(u4)
+	cmpd	rWORD1, rWORD2
+	beq-	cr1, L(u4)
+	lbzu    rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	L(u4)
+	cmpdi	cr1, rWORD3, 0
+	bdz	L(u3)
+	cmpd	rWORD3, rWORD4
+	beq-    cr1, L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne-    L(u3)
+	cmpdi	cr1, rWORD1, 0
+	bdz	L(u4)
+	cmpd	rWORD1, rWORD2
+	beq-	cr1, L(u4)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	L(u4)
+	cmpdi	cr1, rWORD3, 0
+	bdz	L(u3)
+	cmpd	rWORD3, rWORD4
+	beq-    cr1, L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	beq+    L(u1)
+
+L(u3):  sub     rRTN, rWORD3, rWORD4
+        blr
+L(u4):	sub	rRTN, rWORD1, rWORD2
+	blr
+L(ux):
+	li	rRTN, 0
+	blr
+END (BP_SYM (strncmp))
+libc_hidden_builtin_def (strncmp)
+