summary refs log tree commit diff
path: root/sysdeps/powerpc/strcmp.s
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/strcmp.s')
-rw-r--r--sysdeps/powerpc/strcmp.s273
1 files changed, 273 insertions, 0 deletions
diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s
new file mode 100644
index 0000000000..f901b82ab1
--- /dev/null
+++ b/sysdeps/powerpc/strcmp.s
@@ -0,0 +1,273 @@
+ # Optimized strcmp implementation for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # See strlen.s for comments on how the end-of-string testing works.
+
+	.section ".text"
+	.align 3
+	.globl strcmp
+	.type strcmp,@function
+strcmp:
+ # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])
+
+ # General register assignments:
+ # r0:	temporary
+ # r3:	pointer to previous word in s1
+ # r4:	pointer to previous word in s2
+ # r5:	current first word in s1
+ # r6:	current first word in s2 (after re-alignment)
+ # r7:	0xfefefeff
+ # r8:	0x7f7f7f7f
+ # r9:	~(word in s1 | 0x7f7f7f7f)
+	
+ # Register assignments in the prologue:
+ # r10:	low 2 bits of p2-p1
+ # r11:	mask to orc with r5/r6
+	
+	subf. %r10,%r4,%r3
+	beq-  equal
+	andi. %r10,%r10,3
+	cmpi  %cr1,%r10,2
+	beq-  %cr1,align2
+	lis   %r7,0xfeff
+	lis   %r8,0x7f7f
+	addi  %r8,%r8,0x7f7f
+	addi  %r7,%r7,0xfffffeff
+	bgt-  %cr1,align3
+strcmp3:
+	rlwinm %r0,%r3,3,27,28
+	li    %r11,-1
+	srw   %r11,%r11,%r0
+	clrrwi %r3,%r3,2
+	clrrwi %r4,%r4,2
+	lwz   %r5,0(%r3)
+	lwz   %r6,0(%r4)
+	bne-  align1
+
+ # The loop, case when both strings are aligned the same.
+ # on entry, cr1.eq must be 1.
+ # r10:	second word in s1
+ # r11:	second word in s2 OR mask to orc with first two words.
+align0:	
+	andi. %r0,%r3,4
+	orc   %r5,%r5,%r11
+	orc   %r6,%r6,%r11
+	beq+  a0start
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	cmplw %cr1,%r5,%r6
+	subi  %r3,%r3,4
+	bne-  endstringeq
+	subi  %r4,%r4,4
+	bne-  %cr1,difference
+
+loopalign0:
+	lwzu  %r5,8(%r3)
+	bne-  %cr1,difference2
+	lwzu  %r6,8(%r4)
+a0start:
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	cmplw %cr1,%r5,%r6
+	lwz   %r10,4(%r3)
+	bne-  endstringeq
+	add   %r0,%r7,%r10
+	bne-  %cr1,difference
+	nor   %r9,%r8,%r10
+	lwz   %r11,4(%r4)
+	and.  %r0,%r0,%r9
+	cmplw %cr1,%r10,%r11
+	beq+  loopalign0
+
+	mr    %r5,%r10
+	mr    %r6,%r11
+
+ # fall through to...
+
+endstringeq:
+ # (like 'endstring', but an equality code is in cr1)
+	beq  %cr1,equal
+endstring:
+ # OK. We've hit the end of the string. We need to be careful that
+ # we don't compare two strings as different because of gunk beyond
+ # the end of the strings. We do it like this...
+	and  %r0,%r8,%r5
+	add  %r0,%r0,%r8
+	xor. %r10,%r5,%r6
+	andc %r9,%r9,%r0
+	cntlzw %r10,%r10
+	cntlzw %r9,%r9
+	addi %r9,%r9,7
+	cmpw %cr1,%r9,%r10
+	blt  %cr1,equal
+	sub  %r3,%r5,%r6
+	bgelr+
+	mr   %r3,%r6
+	blr
+equal:	li   %r3,0
+	blr
+	
+ # The loop, case when s2 is aligned 1 char behind s1.
+ # r10:	current word in s2 (before re-alignment)
+
+align1:
+	cmpwi %cr1,%r0,0
+	orc   %r5,%r5,%r11
+	bne   %cr1,align1_123
+ # When s1 is aligned to a word boundary, the startup processing is special.
+	slwi. %r6,%r6,24
+	bne+  a1entry_0
+	nor   %r9,%r8,%r5
+	b     endstring
+
+align1_123:
+ # Otherwise (s1 not aligned to a word boundary):
+	mr    %r10,%r6
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	srwi  %r6,%r6,8
+	orc   %r6,%r6,%r11
+	cmplw %cr1,%r5,%r6
+	bne-  endstringeq
+	bne-  %cr1,difference
+
+loopalign1:
+	slwi. %r6,%r10,24
+	bne-  %cr1,a1difference
+	lwzu  %r5,4(%r3)
+	beq-  endstring1
+a1entry_0:
+	lwzu  %r10,4(%r4)
+a1entry_123:	
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	rlwimi %r6,%r10,24,8,31
+	cmplw %cr1,%r5,%r6
+	beq+  loopalign1
+	b     endstringeq
+
+endstring1:
+	srwi  %r3,%r5,24
+	blr
+
+a1difference:
+	lbz   %r6,-1(%r4)
+	slwi  %r6,%r6,24
+	rlwimi %r6,%r10,24,8,31
+
+ # fall through to...
+		
+difference:	
+ # The idea here is that we could just return '%r5 - %r6', except
+ # that the result might overflow. Overflow can only happen when %r5
+ # and %r6 have different signs (thus the xor), in which case we want to
+ # return negative iff %r6 has its high bit set so %r5 < %r6.
+ # A branch-free implementation of this is
+ #	xor  %r0,%r5,%r6
+ #	rlwinm %r0,%r0,1,31,31
+ #	rlwnm %r5,%r5,%r0,1,31
+ #	rlwnm %r6,%r6,%r0,1,31
+ #	sub  %r3,%r5,%r6
+ #	blr
+ # but this is usually more expensive.
+	xor. %r0,%r5,%r6
+	sub  %r3,%r5,%r6
+	bgelr+
+	mr   %r3,%r6
+	blr
+
+difference2:
+ # As for 'difference', but use registers r10 and r11 instead of r5 and r6.
+	xor. %r0,%r10,%r11
+	sub  %r3,%r10,%r11
+	bgelr+
+	mr   %r3,%r11
+	blr
+	
+ # For the case when s2 is aligned 3 chars behind s1, we switch
+ # s1 and s2...
+ # r10:	used by 'align2' (see below)
+ # r11:	used by 'align2' (see below)
+ # r12:	saved link register
+ # cr0.eq: must be left as 1.
+
+align3:	mflr %r12
+	mr   %r0,%r3
+	mr   %r3,%r4
+	mr   %r4,%r0
+	bl   strcmp3
+	mtlr %r12
+	neg  %r3,%r3
+	blr
+	
+ # The loop, case when s2 and s1's alignments differ by 2
+ # This is the ugly case...
+ # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
+ # because there are too many branches. This loop should probably be
+ # coded like the align1 case.
+	
+a2even:	lhz   %r5,0(%r3)
+	lhz   %r6,0(%r4)
+	b     a2entry
+	
+align2:
+	andi. %r0,%r3,1
+	beq+  a2even
+	subi  %r3,%r3,1
+	subi  %r4,%r4,1
+	lbz   %r5,1(%r3)
+	lbz   %r6,1(%r4)
+	cmpwi %cr0,%r5,0
+	cmpw  %cr1,%r5,%r6
+	beq-  align2end2
+	lhzu  %r5,2(%r3)
+	beq+  %cr1,a2entry1
+	lbz   %r5,-1(%r3)
+	sub   %r3,%r5,%r6
+	blr
+
+loopalign2:
+	cmpw  %cr1,%r5,%r6
+	beq-  align2end2
+	lhzu  %r5,2(%r3)
+	bne-  %cr1,align2different
+a2entry1:
+	lhzu  %r6,2(%r4)
+a2entry:	
+	cmpwi %cr5,%r5,0x00ff
+	andi. %r0,%r5,0x00ff
+	bgt+  %cr5,loopalign2
+
+align2end:
+	andi. %r3,%r6,0xff00
+	neg   %r3,%r3
+	blr
+
+align2different:
+	lhzu  %r5,-2(%r3)
+align2end2:
+	sub   %r3,%r5,%r6
+	blr
+		
+0:
+	.size	 strcmp,0b-strcmp