about summary refs log tree commit diff
path: root/sysdeps/sparc/sparc64/strcmp.S
blob: f32b0fc779cc4722bd380efc7e045b253e51266c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/* Compare two strings for differences.
   For SPARC v9.
   Copyright (C) 2011-2014 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include <asm/asi.h>

#ifndef XCC
	.register	%g2, #scratch
	.register	%g3, #scratch
	.register	%g6, #scratch
#endif

#define rSTR1		%o0
#define rSTR2		%o1
#define r0101		%o2	/* 0x0101010101010101 */
#define r8080		%o3	/* 0x8080808080808080 */
#define rSTRXOR		%o4
#define rWORD1		%o5
#define rTMP1		%g1
#define rTMP2		%g2
#define rWORD2		%g3
#define rSLL		%g4
#define rSRL		%g5
#define rBARREL		%g6

	/* There are two cases, either the two pointers are aligned
	 * identically or they are not.  If they have the same
	 * alignment we can use the normal full speed loop.  Otherwise
	 * we have to use the barrel-shifter version.
	 */

	.text
	.align	32
ENTRY(strcmp)
	or	rSTR2, rSTR1, rTMP1
	sethi	%hi(0x80808080), r8080

	andcc	rTMP1, 0x7, %g0
	bne,pn	%icc, .Lmaybe_barrel_shift
	 or	r8080, %lo(0x80808080), r8080
	ldx	[rSTR1], rWORD1

	sub	rSTR2, rSTR1, rSTR2
	sllx	r8080, 32, rTMP1

	ldx	[rSTR1 + rSTR2], rWORD2
	or	r8080, rTMP1, r8080

	ba,pt	%xcc, .Laligned_loop_entry
	 srlx	r8080, 7, r0101

	.align	32
.Laligned_loop_entry:
.Laligned_loop:
	add	rSTR1, 8, rSTR1

	sub	rWORD1, r0101, rTMP2
	xorcc	rWORD1, rWORD2, rSTRXOR
	bne,pn	%xcc, .Lcommon_endstring

	 andn	r8080, rWORD1, rTMP1

	ldxa	[rSTR1] ASI_PNF, rWORD1
	andcc	rTMP1, rTMP2, %g0
	be,a,pt	%xcc, .Laligned_loop

	 ldxa	[rSTR1 + rSTR2] ASI_PNF, rWORD2

.Lcommon_equal:
	retl
	 mov	0, %o0

	/* All loops terminate here once they find an unequal word.
	 * If a zero byte appears in the word before the first unequal
	 * byte, we must report zero.  Otherwise we report '1' or '-1'
	 * depending upon whether the first mis-matching byte is larger
	 * in the first string or the second, respectively.
	 *
	 * First we compute a 64-bit mask value that has "0x01" in
	 * each byte where a zero exists in rWORD1.  rSTRXOR holds the
	 * value (rWORD1 ^ rWORD2).  Therefore, if considered as an
	 * unsigned quantity, our "0x01" mask value is "greater than"
	 * rSTRXOR then a zero terminating byte comes first and
	 * therefore we report '0'.
	 *
	 * The formula for this mask is:
	 *
	 *    mask_tmp1 = ~rWORD1 & 0x8080808080808080;
	 *    mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) +
	 *                 0x7f7f7f7f7f7f7f7f);
	 *
	 *    mask = ((mask_tmp1 & ~mask_tmp2) >> 7);
	 */
.Lcommon_endstring:
	andn	rWORD1, r8080, rTMP2
	or	r8080, 1, %o1

	mov	1, %o0
	sub	rTMP2, %o1, rTMP2

	cmp	rWORD1, rWORD2
	andn	rTMP1, rTMP2, rTMP1

	movleu	%xcc, -1, %o0
	srlx	rTMP1, 7, rTMP1

	/* In order not to be influenced by bytes after the zero byte, we
	 * have to retain only the highest bit in the mask for the comparison
	 * with rSTRXOR to work properly.
	 */
	mov	0, rTMP2
	andcc	rTMP1, 0x0100, %g0

	movne	%xcc, 8, rTMP2
	sllx	rTMP1, 63 - 16, %o1

	movrlz	%o1, 16, rTMP2
	sllx	rTMP1, 63 - 24, %o1

	movrlz	%o1, 24, rTMP2
	sllx	rTMP1, 63 - 32, %o1

	movrlz	%o1, 32, rTMP2
	sllx	rTMP1, 63 - 40, %o1

	movrlz	%o1, 40, rTMP2
	sllx	rTMP1, 63 - 48, %o1

	movrlz	%o1, 48, rTMP2
	sllx	rTMP1, 63 - 56, %o1

	movrlz	%o1, 56, rTMP2

	srlx	rTMP1, rTMP2, rTMP1

	sllx	rTMP1, rTMP2, rTMP1

	cmp	rTMP1, rSTRXOR
	retl
	 movgu	%xcc, 0, %o0

.Lmaybe_barrel_shift:
	sub	rSTR2, rSTR1, rSTR2
	sllx	r8080, 32, rTMP1

	or	r8080, rTMP1, r8080
	and	rSTR1, 0x7, rTMP2

	srlx	r8080, 7, r0101
	andn	rSTR1, 0x7, rSTR1

	ldxa	[rSTR1] ASI_PNF, rWORD1
	andcc	rSTR2, 0x7, rSLL
	sll	rTMP2, 3, rSTRXOR

	bne,pn	%icc, .Lneed_barrel_shift
	 mov	-1, rTMP1
	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL

	srlx	rTMP1, rSTRXOR, rTMP2

	orn	rWORD1, rTMP2, rWORD1
	ba,pt	%xcc, .Laligned_loop_entry
	 orn	rBARREL, rTMP2, rWORD2

.Lneed_barrel_shift:
	sllx	rSLL, 3, rSLL
	andn	rSTR2, 0x7, rSTR2

	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
	mov	64, rTMP2
	sub	rTMP2, rSLL, rSRL

	srlx	rTMP1, rSTRXOR, rTMP1
	add	rSTR2, 8, rSTR2

	orn	rWORD1, rTMP1, rWORD1
	sllx	rBARREL, rSLL, rWORD2
	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL

	add	rSTR1, 8, rSTR1
	sub	rWORD1, r0101, rTMP2

	srlx	rBARREL, rSRL, rSTRXOR

	or	rWORD2, rSTRXOR, rWORD2

	orn	rWORD2, rTMP1, rWORD2
	ba,pt	%xcc, .Lbarrel_shift_loop_entry
	 andn	r8080, rWORD1, rTMP1

.Lbarrel_shift_loop:
	sllx	rBARREL, rSLL, rWORD2
	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL

	add	rSTR1, 8, rSTR1
	sub	rWORD1, r0101, rTMP2

	srlx	rBARREL, rSRL, rSTRXOR
	andn	r8080, rWORD1, rTMP1

	or	rWORD2, rSTRXOR, rWORD2

.Lbarrel_shift_loop_entry:
	xorcc	rWORD1, rWORD2, rSTRXOR
	bne,pn	%xcc, .Lcommon_endstring

	 andcc	rTMP1, rTMP2, %g0
	be,a,pt	%xcc, .Lbarrel_shift_loop
	 ldxa	[rSTR1] ASI_PNF, rWORD1

	retl
	 mov	0, %o0
END(strcmp)
libc_hidden_builtin_def (strcmp)