about summary refs log tree commit diff
path: root/sysdeps/powerpc/strcmp.s
blob: f901b82ab1dbf06128add8787d8a66134cfc1988 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
 # Optimized strcmp implementation for PowerPC.
 # Copyright (C) 1997 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 #
 # The GNU C Library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Library General Public License as
 # published by the Free Software Foundation; either version 2 of the
 # License, or (at your option) any later version.
 #
 # The GNU C Library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Library General Public License for more details.
 #
 # You should have received a copy of the GNU Library General Public
 # License along with the GNU C Library; see the file COPYING.LIB.  If not,
 # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 # Boston, MA 02111-1307, USA.

 # See strlen.s for comments on how the end-of-string testing works.

	.section ".text"
	.align 3
	.globl strcmp
	.type strcmp,@function
strcmp:
 # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])

 # General register assignments:
 # r0:	temporary
 # r3:	pointer to previous word in s1
 # r4:	pointer to previous word in s2
 # r5:	current first word in s1
 # r6:	current first word in s2 (after re-alignment)
 # r7:	0xfefefeff
 # r8:	0x7f7f7f7f
 # r9:	~(word in s1 | 0x7f7f7f7f)
	
 # Register assignments in the prologue:
 # r10:	low 2 bits of p2-p1
 # r11:	mask to orc with r5/r6
	
	subf. %r10,%r4,%r3
	beq-  equal
	andi. %r10,%r10,3
	cmpi  %cr1,%r10,2
	beq-  %cr1,align2
	lis   %r7,0xfeff
	lis   %r8,0x7f7f
	addi  %r8,%r8,0x7f7f
	addi  %r7,%r7,0xfffffeff
	bgt-  %cr1,align3
strcmp3:
	rlwinm %r0,%r3,3,27,28
	li    %r11,-1
	srw   %r11,%r11,%r0
	clrrwi %r3,%r3,2
	clrrwi %r4,%r4,2
	lwz   %r5,0(%r3)
	lwz   %r6,0(%r4)
	bne-  align1

 # The loop, case when both strings are aligned the same.
 # on entry, cr1.eq must be 1.
 # r10:	second word in s1
 # r11:	second word in s2 OR mask to orc with first two words.
align0:	
	andi. %r0,%r3,4
	orc   %r5,%r5,%r11
	orc   %r6,%r6,%r11
	beq+  a0start
	add   %r0,%r7,%r5
	nor   %r9,%r8,%r5
	and.  %r0,%r0,%r9
	cmplw %cr1,%r5,%r6
	subi  %r3,%r3,4
	bne-  endstringeq
	subi  %r4,%r4,4
	bne-  %cr1,difference

loopalign0:
	lwzu  %r5,8(%r3)
	bne-  %cr1,difference2
	lwzu  %r6,8(%r4)
a0start:
	add   %r0,%r7,%r5
	nor   %r9,%r8,%r5
	and.  %r0,%r0,%r9
	cmplw %cr1,%r5,%r6
	lwz   %r10,4(%r3)
	bne-  endstringeq
	add   %r0,%r7,%r10
	bne-  %cr1,difference
	nor   %r9,%r8,%r10
	lwz   %r11,4(%r4)
	and.  %r0,%r0,%r9
	cmplw %cr1,%r10,%r11
	beq+  loopalign0

	mr    %r5,%r10
	mr    %r6,%r11

 # fall through to...

endstringeq:
 # (like 'endstring', but an equality code is in cr1)
	beq  %cr1,equal
endstring:
 # OK. We've hit the end of the string. We need to be careful that
 # we don't compare two strings as different because of gunk beyond
 # the end of the strings. We do it like this...
	and  %r0,%r8,%r5
	add  %r0,%r0,%r8
	xor. %r10,%r5,%r6
	andc %r9,%r9,%r0
	cntlzw %r10,%r10
	cntlzw %r9,%r9
	addi %r9,%r9,7
	cmpw %cr1,%r9,%r10
	blt  %cr1,equal
	sub  %r3,%r5,%r6
	bgelr+
	mr   %r3,%r6
	blr
equal:	li   %r3,0
	blr
	
 # The loop, case when s2 is aligned 1 char behind s1.
 # r10:	current word in s2 (before re-alignment)

align1:
	cmpwi %cr1,%r0,0
	orc   %r5,%r5,%r11
	bne   %cr1,align1_123
 # When s1 is aligned to a word boundary, the startup processing is special.
	slwi. %r6,%r6,24
	bne+  a1entry_0
	nor   %r9,%r8,%r5
	b     endstring

align1_123:
 # Otherwise (s1 not aligned to a word boundary):
	mr    %r10,%r6
	add   %r0,%r7,%r5
	nor   %r9,%r8,%r5
	and.  %r0,%r0,%r9
	srwi  %r6,%r6,8
	orc   %r6,%r6,%r11
	cmplw %cr1,%r5,%r6
	bne-  endstringeq
	bne-  %cr1,difference

loopalign1:
	slwi. %r6,%r10,24
	bne-  %cr1,a1difference
	lwzu  %r5,4(%r3)
	beq-  endstring1
a1entry_0:
	lwzu  %r10,4(%r4)
a1entry_123:	
	add   %r0,%r7,%r5
	nor   %r9,%r8,%r5
	and.  %r0,%r0,%r9
	rlwimi %r6,%r10,24,8,31
	cmplw %cr1,%r5,%r6
	beq+  loopalign1
	b     endstringeq

endstring1:
	srwi  %r3,%r5,24
	blr

a1difference:
	lbz   %r6,-1(%r4)
	slwi  %r6,%r6,24
	rlwimi %r6,%r10,24,8,31

 # fall through to...
		
difference:	
 # The idea here is that we could just return '%r5 - %r6', except
 # that the result might overflow. Overflow can only happen when %r5
 # and %r6 have different signs (thus the xor), in which case we want to
 # return negative iff %r6 has its high bit set so %r5 < %r6.
 # A branch-free implementation of this is
 #	xor  %r0,%r5,%r6
 #	rlwinm %r0,%r0,1,31,31
 #	rlwnm %r5,%r5,%r0,1,31
 #	rlwnm %r6,%r6,%r0,1,31
 #	sub  %r3,%r5,%r6
 #	blr
 # but this is usually more expensive.
	xor. %r0,%r5,%r6
	sub  %r3,%r5,%r6
	bgelr+
	mr   %r3,%r6
	blr

difference2:
 # As for 'difference', but use registers r10 and r11 instead of r5 and r6.
	xor. %r0,%r10,%r11
	sub  %r3,%r10,%r11
	bgelr+
	mr   %r3,%r11
	blr
	
 # For the case when s2 is aligned 3 chars behind s1, we switch
 # s1 and s2...
 # r10:	used by 'align2' (see below)
 # r11:	used by 'align2' (see below)
 # r12:	saved link register
 # cr0.eq: must be left as 1.

align3:	mflr %r12
	mr   %r0,%r3
	mr   %r3,%r4
	mr   %r4,%r0
	bl   strcmp3
	mtlr %r12
	neg  %r3,%r3
	blr
	
 # The loop, case when s2 and s1's alignments differ by 2
 # This is the ugly case...
 # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
 # because there are too many branches. This loop should probably be
 # coded like the align1 case.
	
a2even:	lhz   %r5,0(%r3)
	lhz   %r6,0(%r4)
	b     a2entry
	
align2:
	andi. %r0,%r3,1
	beq+  a2even
	subi  %r3,%r3,1
	subi  %r4,%r4,1
	lbz   %r5,1(%r3)
	lbz   %r6,1(%r4)
	cmpwi %cr0,%r5,0
	cmpw  %cr1,%r5,%r6
	beq-  align2end2
	lhzu  %r5,2(%r3)
	beq+  %cr1,a2entry1
	lbz   %r5,-1(%r3)
	sub   %r3,%r5,%r6
	blr

loopalign2:
	cmpw  %cr1,%r5,%r6
	beq-  align2end2
	lhzu  %r5,2(%r3)
	bne-  %cr1,align2different
a2entry1:
	lhzu  %r6,2(%r4)
a2entry:	
	cmpwi %cr5,%r5,0x00ff
	andi. %r0,%r5,0x00ff
	bgt+  %cr5,loopalign2

align2end:
	andi. %r3,%r6,0xff00
	neg   %r3,%r3
	blr

align2different:
	lhzu  %r5,-2(%r3)
align2end2:
	sub   %r3,%r5,%r6
	blr
		
0:
	.size	 strcmp,0b-strcmp