1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
# Optimized strcmp implementation for PowerPC.
# Copyright (C) 1997 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with the GNU C Library; see the file COPYING.LIB. If not,
# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
# See strlen.s for comments on how the end-of-string testing works.
.section ".text"
.align 3
.globl strcmp
.type strcmp,@function
strcmp:
# int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])
# General register assignments:
# r0: temporary
# r3: pointer to previous word in s1
# r4: pointer to previous word in s2
# r5: current first word in s1
# r6: current first word in s2 (after re-alignment)
# r7: 0xfefefeff
# r8: 0x7f7f7f7f
# r9: ~(word in s1 | 0x7f7f7f7f)
# Register assignments in the prologue:
# r10: low 2 bits of p2-p1
# r11: mask to orc with r5/r6
subf. %r10,%r4,%r3
beq- equal
andi. %r10,%r10,3
cmpi %cr1,%r10,2
beq- %cr1,align2
lis %r7,0xfeff
lis %r8,0x7f7f
addi %r8,%r8,0x7f7f
addi %r7,%r7,0xfffffeff
bgt- %cr1,align3
strcmp3:
rlwinm %r0,%r3,3,27,28
li %r11,-1
srw %r11,%r11,%r0
clrrwi %r3,%r3,2
clrrwi %r4,%r4,2
lwz %r5,0(%r3)
lwz %r6,0(%r4)
bne- align1
# The loop, case when both strings are aligned the same.
# on entry, cr1.eq must be 1.
# r10: second word in s1
# r11: second word in s2 OR mask to orc with first two words.
align0:
andi. %r0,%r3,4
orc %r5,%r5,%r11
orc %r6,%r6,%r11
beq+ a0start
add %r0,%r7,%r5
nor %r9,%r8,%r5
and. %r0,%r0,%r9
cmplw %cr1,%r5,%r6
subi %r3,%r3,4
bne- endstringeq
subi %r4,%r4,4
bne- %cr1,difference
loopalign0:
lwzu %r5,8(%r3)
bne- %cr1,difference2
lwzu %r6,8(%r4)
a0start:
add %r0,%r7,%r5
nor %r9,%r8,%r5
and. %r0,%r0,%r9
cmplw %cr1,%r5,%r6
lwz %r10,4(%r3)
bne- endstringeq
add %r0,%r7,%r10
bne- %cr1,difference
nor %r9,%r8,%r10
lwz %r11,4(%r4)
and. %r0,%r0,%r9
cmplw %cr1,%r10,%r11
beq+ loopalign0
mr %r5,%r10
mr %r6,%r11
# fall through to...
endstringeq:
# (like 'endstring', but an equality code is in cr1)
beq %cr1,equal
endstring:
# OK. We've hit the end of the string. We need to be careful that
# we don't compare two strings as different because of gunk beyond
# the end of the strings. We do it like this...
and %r0,%r8,%r5
add %r0,%r0,%r8
xor. %r10,%r5,%r6
andc %r9,%r9,%r0
cntlzw %r10,%r10
cntlzw %r9,%r9
addi %r9,%r9,7
cmpw %cr1,%r9,%r10
blt %cr1,equal
sub %r3,%r5,%r6
bgelr+
mr %r3,%r6
blr
equal: li %r3,0
blr
# The loop, case when s2 is aligned 1 char behind s1.
# r10: current word in s2 (before re-alignment)
align1:
cmpwi %cr1,%r0,0
orc %r5,%r5,%r11
bne %cr1,align1_123
# When s1 is aligned to a word boundary, the startup processing is special.
slwi. %r6,%r6,24
bne+ a1entry_0
nor %r9,%r8,%r5
b endstring
align1_123:
# Otherwise (s1 not aligned to a word boundary):
mr %r10,%r6
add %r0,%r7,%r5
nor %r9,%r8,%r5
and. %r0,%r0,%r9
srwi %r6,%r6,8
orc %r6,%r6,%r11
cmplw %cr1,%r5,%r6
bne- endstringeq
bne- %cr1,difference
loopalign1:
slwi. %r6,%r10,24
bne- %cr1,a1difference
lwzu %r5,4(%r3)
beq- endstring1
a1entry_0:
lwzu %r10,4(%r4)
a1entry_123:
add %r0,%r7,%r5
nor %r9,%r8,%r5
and. %r0,%r0,%r9
rlwimi %r6,%r10,24,8,31
cmplw %cr1,%r5,%r6
beq+ loopalign1
b endstringeq
endstring1:
srwi %r3,%r5,24
blr
a1difference:
lbz %r6,-1(%r4)
slwi %r6,%r6,24
rlwimi %r6,%r10,24,8,31
# fall through to...
difference:
# The idea here is that we could just return '%r5 - %r6', except
# that the result might overflow. Overflow can only happen when %r5
# and %r6 have different signs (thus the xor), in which case we want to
# return negative iff %r6 has its high bit set so %r5 < %r6.
# A branch-free implementation of this is
# xor %r0,%r5,%r6
# rlwinm %r0,%r0,1,31,31
# rlwnm %r5,%r5,%r0,1,31
# rlwnm %r6,%r6,%r0,1,31
# sub %r3,%r5,%r6
# blr
# but this is usually more expensive.
xor. %r0,%r5,%r6
sub %r3,%r5,%r6
bgelr+
mr %r3,%r6
blr
difference2:
# As for 'difference', but use registers r10 and r11 instead of r5 and r6.
xor. %r0,%r10,%r11
sub %r3,%r10,%r11
bgelr+
mr %r3,%r11
blr
# For the case when s2 is aligned 3 chars behind s1, we switch
# s1 and s2...
# r10: used by 'align2' (see below)
# r11: used by 'align2' (see below)
# r12: saved link register
# cr0.eq: must be left as 1.
align3: mflr %r12
mr %r0,%r3
mr %r3,%r4
mr %r4,%r0
bl strcmp3
mtlr %r12
neg %r3,%r3
blr
# The loop, case when s2 and s1's alignments differ by 2
# This is the ugly case...
# FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
# because there are too many branches. This loop should probably be
# coded like the align1 case.
a2even: lhz %r5,0(%r3)
lhz %r6,0(%r4)
b a2entry
align2:
andi. %r0,%r3,1
beq+ a2even
subi %r3,%r3,1
subi %r4,%r4,1
lbz %r5,1(%r3)
lbz %r6,1(%r4)
cmpwi %cr0,%r5,0
cmpw %cr1,%r5,%r6
beq- align2end2
lhzu %r5,2(%r3)
beq+ %cr1,a2entry1
lbz %r5,-1(%r3)
sub %r3,%r5,%r6
blr
loopalign2:
cmpw %cr1,%r5,%r6
beq- align2end2
lhzu %r5,2(%r3)
bne- %cr1,align2different
a2entry1:
lhzu %r6,2(%r4)
a2entry:
cmpwi %cr5,%r5,0x00ff
andi. %r0,%r5,0x00ff
bgt+ %cr5,loopalign2
align2end:
andi. %r3,%r6,0xff00
neg %r3,%r3
blr
align2different:
lhzu %r5,-2(%r3)
align2end2:
sub %r3,%r5,%r6
blr
0:
.size strcmp,0b-strcmp
|