about summary refs log tree commit diff
path: root/sysdeps/aarch64/dl-tlsdesc.S
blob: 9e557dd1344eac72b8d6966f3763e0270f87f44e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
/* Thread-local storage handling in the ELF dynamic linker.
   AArch64 version.
   Copyright (C) 2011-2016 Free Software Foundation, Inc.

   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include <tls.h>
#include "tlsdesc.h"

#define NSAVEDQREGPAIRS	16
#define SAVE_Q_REGISTERS				\
	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
	stp	 q2,  q3, [sp, #32*1];			\
	stp	 q4,  q5, [sp, #32*2];			\
	stp	 q6,  q7, [sp, #32*3];			\
	stp	 q8,  q9, [sp, #32*4];			\
	stp	q10, q11, [sp, #32*5];			\
	stp	q12, q13, [sp, #32*6];			\
	stp	q14, q15, [sp, #32*7];			\
	stp	q16, q17, [sp, #32*8];			\
	stp	q18, q19, [sp, #32*9];			\
	stp	q20, q21, [sp, #32*10];			\
	stp	q22, q23, [sp, #32*11];			\
	stp	q24, q25, [sp, #32*12];			\
	stp	q26, q27, [sp, #32*13];			\
	stp	q28, q29, [sp, #32*14];			\
	stp	q30, q31, [sp, #32*15];

#define RESTORE_Q_REGISTERS				\
	ldp	 q2,  q3, [sp, #32*1];			\
	ldp	 q4,  q5, [sp, #32*2];			\
	ldp	 q6,  q7, [sp, #32*3];			\
	ldp	 q8,  q9, [sp, #32*4];			\
	ldp	q10, q11, [sp, #32*5];			\
	ldp	q12, q13, [sp, #32*6];			\
	ldp	q14, q15, [sp, #32*7];			\
	ldp	q16, q17, [sp, #32*8];			\
	ldp	q18, q19, [sp, #32*9];			\
	ldp	q20, q21, [sp, #32*10];			\
	ldp	q22, q23, [sp, #32*11];			\
	ldp	q24, q25, [sp, #32*12];			\
	ldp	q26, q27, [sp, #32*13];			\
	ldp	q28, q29, [sp, #32*14];			\
	ldp	q30, q31, [sp, #32*15];			\
	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);

	.text

	/* Compute the thread pointer offset for symbols in the static
	   TLS block. The offset is the same for all threads.
	   Prototype:
	   _dl_tlsdesc_return (tlsdesc *) ;
	 */
	.hidden _dl_tlsdesc_return
	.global	_dl_tlsdesc_return
	.type	_dl_tlsdesc_return,%function
	cfi_startproc
	.align 2
_dl_tlsdesc_return:
	DELOUSE (0)
	ldr	PTR_REG (0), [x0, #PTR_SIZE]
	RET
	cfi_endproc
	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return

	/* Same as _dl_tlsdesc_return but with synchronization for
	   lazy relocation.
	   Prototype:
	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
	 */
	.hidden _dl_tlsdesc_return_lazy
	.global	_dl_tlsdesc_return_lazy
	.type	_dl_tlsdesc_return_lazy,%function
	cfi_startproc
	.align 2
_dl_tlsdesc_return_lazy:
	/* The ldar here happens after the load from [x0] at the call site
	   (that is generated by the compiler as part of the TLS access ABI),
	   so it reads the same value (this function is the final value of
	   td->entry) and thus it synchronizes with the release store to
	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
	DELOUSE (0)
	ldar	PTR_REG (zr), [x0]
	ldr	PTR_REG (0), [x0, #PTR_SIZE]
	RET
	cfi_endproc
	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy

	/* Handler for undefined weak TLS symbols.
	   Prototype:
	   _dl_tlsdesc_undefweak (tlsdesc *);

	   The second word of the descriptor contains the addend.
	   Return the addend minus the thread pointer. This ensures
	   that when the caller adds on the thread pointer it gets back
	   the addend.  */

	.hidden _dl_tlsdesc_undefweak
	.global	_dl_tlsdesc_undefweak
	.type	_dl_tlsdesc_undefweak,%function
	cfi_startproc
	.align  2
_dl_tlsdesc_undefweak:
	str	x1, [sp, #-16]!
	cfi_adjust_cfa_offset (16)
	/* The ldar here happens after the load from [x0] at the call site
	   (that is generated by the compiler as part of the TLS access ABI),
	   so it reads the same value (this function is the final value of
	   td->entry) and thus it synchronizes with the release store to
	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
	   from [x0,#8] here happens after the initialization of td->arg.  */
	DELOUSE (0)
	ldar	PTR_REG (zr), [x0]
	ldr	PTR_REG (0), [x0, #PTR_SIZE]
	mrs	x1, tpidr_el0
	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
	ldr	x1, [sp], #16
	cfi_adjust_cfa_offset (-16)
	RET
	cfi_endproc
	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak

#ifdef SHARED
	/* Handler for dynamic TLS symbols.
	   Prototype:
	   _dl_tlsdesc_dynamic (tlsdesc *) ;

	   The second word of the descriptor points to a
	   tlsdesc_dynamic_arg structure.

	   Returns the offset between the thread pointer and the
	   object referenced by the argument.

	   ptrdiff_t
	   __attribute__ ((__regparm__ (1)))
	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
	   {
	     struct tlsdesc_dynamic_arg *td = tdp->arg;
	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
	     if (__builtin_expect (td->gen_count <= dtv[0].counter
		&& (dtv[td->tlsinfo.ti_module].pointer.val
		    != TLS_DTV_UNALLOCATED),
		1))
	       return dtv[td->tlsinfo.ti_module].pointer.val
		+ td->tlsinfo.ti_offset
		- __thread_pointer;

	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
	   }
	 */

	.hidden _dl_tlsdesc_dynamic
	.global	_dl_tlsdesc_dynamic
	.type	_dl_tlsdesc_dynamic,%function
	cfi_startproc
	.align 2
_dl_tlsdesc_dynamic:
# define NSAVEXREGPAIRS 2
	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
	mov	x29, sp
	DELOUSE (0)

	/* Save just enough registers to support fast path, if we fall
	   into slow path we will save additional registers.  */

	stp	x1,  x2, [sp, #32+16*0]
	stp	x3,  x4, [sp, #32+16*1]

	mrs	x4, tpidr_el0
	/* The ldar here happens after the load from [x0] at the call site
	   (that is generated by the compiler as part of the TLS access ABI),
	   so it reads the same value (this function is the final value of
	   td->entry) and thus it synchronizes with the release store to
	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg.  */
	ldar	PTR_REG (zr), [x0]
	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
	ldr	PTR_REG (0), [x4,#TCBHEAD_DTV]
	ldr	PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
	ldr	PTR_REG (2), [x0,#DTV_COUNTER]
	cmp	PTR_REG (3), PTR_REG (2)
	b.hi	2f
	ldr	PTR_REG (2), [x1,#TLSDESC_MODID]
	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
	ldr	PTR_REG (0), [x0] /* Load val member of DTV entry.  */
	cmp	x0, #TLS_DTV_UNALLOCATED
	b.eq	2f
	ldr	PTR_REG (1), [x1,#TLSDESC_MODOFF]
	add	PTR_REG (0), PTR_REG (0), PTR_REG (1)
	sub	PTR_REG (0), PTR_REG (0), PTR_REG (4)
1:
	ldp	 x1,  x2, [sp, #32+16*0]
	ldp	 x3,  x4, [sp, #32+16*1]

	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
# undef NSAVEXREGPAIRS
	RET
2:
	/* This is the slow path. We need to call __tls_get_addr() which
	   means we need to save and restore all the register that the
	   callee will trash.  */

	/* Save the remaining registers that we must treat as caller save.  */
# define NSAVEXREGPAIRS 7
	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
	stp	 x7,  x8, [sp, #16*1]
	stp	 x9, x10, [sp, #16*2]
	stp	x11, x12, [sp, #16*3]
	stp	x13, x14, [sp, #16*4]
	stp	x15, x16, [sp, #16*5]
	stp	x17, x18, [sp, #16*6]

	SAVE_Q_REGISTERS

	mov	x0, x1
	bl	__tls_get_addr

	mrs	x1, tpidr_el0
	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)

	RESTORE_Q_REGISTERS

	ldp	 x7,  x8, [sp, #16*1]
	ldp	 x9, x10, [sp, #16*2]
	ldp	x11, x12, [sp, #16*3]
	ldp	x13, x14, [sp, #16*4]
	ldp	x15, x16, [sp, #16*5]
	ldp	x17, x18, [sp, #16*6]
	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
	b	1b
	cfi_endproc
	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
# undef NSAVEXREGPAIRS
#endif

	/* This function is a wrapper for a lazy resolver for TLS_DESC
	   RELA relocations.
	   When the actual resolver returns, it will have adjusted the
	   TLS descriptor such that we can tail-call it for it to return
	   the TP offset of the symbol.  */

	.hidden _dl_tlsdesc_resolve_rela
	.global	_dl_tlsdesc_resolve_rela
	.type	_dl_tlsdesc_resolve_rela,%function
	cfi_startproc
	.align 2
_dl_tlsdesc_resolve_rela:
#define	NSAVEXREGPAIRS 9
	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
	mov	x29, sp
	stp	 x1,  x4, [sp, #32+16*0]
	stp	 x5,  x6, [sp, #32+16*1]
	stp	 x7,  x8, [sp, #32+16*2]
	stp	 x9, x10, [sp, #32+16*3]
	stp	x11, x12, [sp, #32+16*4]
	stp	x13, x14, [sp, #32+16*5]
	stp	x15, x16, [sp, #32+16*6]
	stp	x17, x18, [sp, #32+16*7]
	str	x0,       [sp, #32+16*8]

	SAVE_Q_REGISTERS

	DELOUSE (3)
	ldr	PTR_REG (1), [x3, #PTR_SIZE]
	bl	_dl_tlsdesc_resolve_rela_fixup

	RESTORE_Q_REGISTERS

	ldr	x0, [sp, #32+16*8]
	DELOUSE (0)
	ldr	PTR_REG (1), [x0]
	blr	x1

	ldp	 x1,  x4, [sp, #32+16*0]
	ldp	 x5,  x6, [sp, #32+16*1]
	ldp	 x7,  x8, [sp, #32+16*2]
	ldp	 x9, x10, [sp, #32+16*3]
	ldp	x11, x12, [sp, #32+16*4]
	ldp	x13, x14, [sp, #32+16*5]
	ldp	x15, x16, [sp, #32+16*6]
	ldp	x17, x18, [sp, #32+16*7]
	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
	ldp	x2, x3, [sp], #16
	cfi_adjust_cfa_offset (-16)
	RET
#undef NSAVEXREGPAIRS
	cfi_endproc
	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela

	/* This function is a placeholder for lazy resolving of TLS
	relocations.  Once some thread starts resolving a TLS
	relocation, it sets up the TLS descriptor to use this
	resolver, such that other threads that would attempt to
	resolve it concurrently may skip the call to the original lazy
	resolver and go straight to a condition wait.

	When the actual resolver returns, it will have adjusted the
	TLS descriptor such that we can tail-call it for it to return
	the TP offset of the symbol.  */

	.hidden _dl_tlsdesc_resolve_hold
	.global	_dl_tlsdesc_resolve_hold
	.type	_dl_tlsdesc_resolve_hold,%function
	cfi_startproc
	.align 2
_dl_tlsdesc_resolve_hold:
#define	NSAVEXREGPAIRS 10
1:
	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
	mov	x29, sp
	stp	 x1,  x2, [sp, #32+16*0]
	stp	 x3,  x4, [sp, #32+16*1]
	stp	 x5,  x6, [sp, #32+16*2]
	stp	 x7,  x8, [sp, #32+16*3]
	stp	 x9, x10, [sp, #32+16*4]
	stp	x11, x12, [sp, #32+16*5]
	stp	x13, x14, [sp, #32+16*6]
	stp	x15, x16, [sp, #32+16*7]
	stp	x17, x18, [sp, #32+16*8]
	str	x0,       [sp, #32+16*9]

	SAVE_Q_REGISTERS

	adr	x1, 1b
	bl	_dl_tlsdesc_resolve_hold_fixup

	RESTORE_Q_REGISTERS

	ldr	x0, [sp, #32+16*9]
	DELOUSE (0)
	ldr	PTR_REG (1), [x0]
	blr	x1

	ldp	 x1,  x2, [sp, #32+16*0]
	ldp	 x3,  x4, [sp, #32+16*1]
	ldp	 x5,  x6, [sp, #32+16*2]
	ldp	 x7,  x8, [sp, #32+16*3]
	ldp	 x9, x10, [sp, #32+16*4]
	ldp	x11, x12, [sp, #32+16*5]
	ldp	x13, x14, [sp, #32+16*6]
	ldp	x15, x16, [sp, #32+16*7]
	ldp	x17, x18, [sp, #32+16*8]
	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
	RET
	cfi_endproc
	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
#undef NSAVEXREGPAIRS