summary refs log tree commit diff
path: root/sysdeps/arm/armv6/strcpy.S
blob: 10c5c575add7528484d9613cfa26b287fd2b1136 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/* strcpy -- copy a nul-terminated string.
   Copyright (C) 2013-2018 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library.  If not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>

/* Endian independent macros for shifting bytes within registers.  */
#ifdef __ARMEB__
#define lsh_gt		lsr
#define lsh_ls		lsl
#else
#define lsh_gt		lsl
#define lsh_ls		lsr
#endif

	.syntax unified
	.text

ENTRY (__stpcpy)
	@ Signal stpcpy with NULL in IP.
	mov	ip, #0
	b	0f
END (__stpcpy)

weak_alias (__stpcpy, stpcpy)
libc_hidden_def (__stpcpy)
libc_hidden_builtin_def (stpcpy)

ENTRY (strcpy)
	@ Signal strcpy with DEST in IP.
	mov	ip, r0
0:
	pld	[r0, #0]
	pld	[r1, #0]

	@ To cater to long strings, we want 8 byte alignment in the source.
	@ To cater to small strings, we don't want to start that right away.
	@ Loop up to 16 times, less whatever it takes to reach alignment.
	and	r3, r1, #7
	rsb	r3, r3, #16

	@ Loop until we find ...
1:	ldrb	r2, [r1], #1
	subs	r3, r3, #1		@ ... the alignment point
	strb	r2, [r0], #1
	it	ne
	cmpne	r2, #0			@ ... or EOS
	bne	1b

	@ Disambiguate the exit possibilites above
	cmp	r2, #0			@ Found EOS
	beq	.Lreturn

	@ Load the next two words asap
	ldrd	r2, r3, [r1], #8
	pld	[r0, #64]
	pld	[r1, #64]

	@ For longer strings, we actaully need a stack frame.
	push	{ r4, r5, r6, r7 }
	cfi_adjust_cfa_offset (16)
	cfi_rel_offset (r4, 0)
	cfi_rel_offset (r5, 4)
	cfi_rel_offset (r6, 8)
	cfi_rel_offset (r7, 12)

	@ Subtracting (unsigned saturating) from 1 for any byte means result
	@ of 1 for any byte that was originally zero and 0 otherwise.
	@ Therefore we consider the lsb of each byte the "found" bit.
#ifdef ARCH_HAS_T2
	movw	r7, #0x0101
	tst	r0, #3			@ Test alignment of DEST
	movt	r7, #0x0101
#else
	ldr	r7, =0x01010101
	tst	r0, #3
#endif
	bne	.Lunaligned

	@ So now source (r1) is aligned to 8, and dest (r0) is aligned to 4.
	@ Loop, reading 8 bytes at a time, searching for EOS.
	.balign	16
2:	uqsub8	r4, r7, r2		@ Find EOS
	uqsub8	r5, r7, r3
	pld	[r1, #128]
	cmp	r4, #0			@ EOS in first word?
	pld	[r0, #128]
	bne	3f
	str	r2, [r0], #4
	cmp	r5, #0			@ EOS in second word?
	bne	4f
	str	r3, [r0], #4
	ldrd	r2, r3, [r1], #8
	b	2b

3:	sub	r1, r1, #4		@ backup to first word
4:	sub	r1, r1, #4		@ backup to second word

	@ ... then finish up any tail a byte at a time.
	@ Note that we generally back up and re-read source bytes,
	@ but we'll not re-write dest bytes.
.Lbyte_loop:
	ldrb	r2, [r1], #1
	cmp	r2, #0
	strb	r2, [r0], #1
	bne	.Lbyte_loop

	pop	{ r4, r5, r6, r7 }
	cfi_remember_state
	cfi_adjust_cfa_offset (-16)
	cfi_restore (r4)
	cfi_restore (r5)
	cfi_restore (r6)
	cfi_restore (r7)

.Lreturn:
	cmp	ip, #0			@ Was this strcpy or stpcpy?
	ite	eq
	subeq	r0, r0, #1		@ stpcpy: undo post-inc from store
	movne	r0, ip			@ strcpy: return original dest
	bx	lr

.Lunaligned:
	cfi_restore_state
	@ Here, source is aligned to 8, but the destination is not word
	@ aligned.  Therefore we have to shift the data in order to be
	@ able to perform aligned word stores.

	@ Find out which misalignment we're dealing with.
	tst	r0, #1
	beq	.Lunaligned2
	tst	r0, #2
	bne	.Lunaligned3
	@ Fallthru to .Lunaligned1.

.macro unaligned_copy	unalign
	@ Prologue to unaligned loop.  Seed shifted non-zero bytes.
	uqsub8	r4, r7, r2		@ Find EOS
	uqsub8	r5, r7, r3
	cmp	r4, #0			@ EOS in first word?
	it	ne
	subne	r1, r1, #8
	bne	.Lbyte_loop
#ifdef __ARMEB__
	rev	r2, r2			@ Byte stores below need LE data
#endif
	@ Store a few bytes from the first word.
	@ At the same time we align r0 and shift out bytes from r2.
.rept	4-\unalign
	strb	r2, [r0], #1
	lsr	r2, r2, #8
.endr
#ifdef __ARMEB__
	rev	r2, r2			@ Undo previous rev
#endif
	@ Rotated unaligned copy loop.  The tail of the prologue is
	@ shared with the loop itself.
	.balign 8
1:	cmp	r5, #0			@ EOS in second word?
	bne	4f
	@ Combine first and second words
	orr	r2, r2, r3, lsh_gt #(\unalign*8)
	@ Save leftover bytes from the two words
	lsh_ls	r6, r3, #((4-\unalign)*8)
	str	r2, [r0], #4
	@ The "real" start of the unaligned copy loop.
	ldrd	r2, r3, [r1], #8	@ Load 8 more bytes
	uqsub8	r4, r7, r2		@ Find EOS
	pld	[r1, #128]
	uqsub8	r5, r7, r3
	pld	[r0, #128]
	cmp	r4, #0			@ EOS in first word?
	bne	3f
	@ Combine the leftover and the first word
	orr	r6, r6, r2, lsh_gt #(\unalign*8)
	@ Discard used bytes from the first word.
	lsh_ls	r2, r2, #((4-\unalign)*8)
	str	r6, [r0], #4
	b	1b
	@ Found EOS in one of the words; adjust backward
3:	sub	r1, r1, #4
	mov	r2, r6
4:	sub	r1, r1, #4
	@ And store the remaining bytes from the leftover
#ifdef __ARMEB__
	rev	r2, r2
#endif
.rept	\unalign
	strb	r2, [r0], #1
	lsr	r2, r2, #8
.endr
	b	.Lbyte_loop
.endm

.Lunaligned1:
	unaligned_copy	1
.Lunaligned2:
	unaligned_copy	2
.Lunaligned3:
	unaligned_copy	3

END (strcpy)

libc_hidden_builtin_def (strcpy)