summary refs log tree commit diff
path: root/sysdeps/x86_64/dl-tlsdesc.S
blob: 3a4adec6c54a444221f7119d5c847cbf7426484e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/* Thread-local storage handling in the ELF dynamic linker.  x86_64 version.
   Copyright (C) 2004, 2005, 2008 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include <tls.h>
#include "tlsdesc.h"

	.text

     /* This function is used to compute the TP offset for symbols in
	Static TLS, i.e., whose TP offset is the same for all
	threads.

	The incoming %rax points to the TLS descriptor, such that
	0(%rax) points to _dl_tlsdesc_return itself, and 8(%rax) holds
	the TP offset of the symbol corresponding to the object
	denoted by the argument.  */

	.hidden _dl_tlsdesc_return
	.global	_dl_tlsdesc_return
	.type	_dl_tlsdesc_return,@function
	cfi_startproc
	.align 16
_dl_tlsdesc_return:
	movq	8(%rax), %rax
	ret
	cfi_endproc
	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return

     /* This function is used for undefined weak TLS symbols, for
	which the base address (i.e., disregarding any addend) should
	resolve to NULL.

	%rax points to the TLS descriptor, such that 0(%rax) points to
	_dl_tlsdesc_undefweak itself, and 8(%rax) holds the addend.
	We return the addend minus the TP, such that, when the caller
	adds TP, it gets the addend back.  If that's zero, as usual,
	that's most likely a NULL pointer.  */

	.hidden _dl_tlsdesc_undefweak
	.global	_dl_tlsdesc_undefweak
	.type	_dl_tlsdesc_undefweak,@function
	cfi_startproc
	.align 16
_dl_tlsdesc_undefweak:
	movq	8(%rax), %rax
	subq	%fs:0, %rax
	ret
	cfi_endproc
	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak

#ifdef SHARED
	.hidden _dl_tlsdesc_dynamic
	.global	_dl_tlsdesc_dynamic
	.type	_dl_tlsdesc_dynamic,@function

     /* %rax points to the TLS descriptor, such that 0(%rax) points to
	_dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
	tlsdesc_dynamic_arg object.  It must return in %rax the offset
	between the thread pointer and the object denoted by the
	argument, without clobbering any registers.

	The assembly code that follows is a rendition of the following
	C code, hand-optimized a little bit.

ptrdiff_t
_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
{
  struct tlsdesc_dynamic_arg *td = tdp->arg;
  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
  if (__builtin_expect (td->gen_count <= dtv[0].counter
			&& (dtv[td->tlsinfo.ti_module].pointer.val
			    != TLS_DTV_UNALLOCATED),
			1))
    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
      - __thread_pointer;

  return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
}
*/
	cfi_startproc
	.align 16
_dl_tlsdesc_dynamic:
	/* Preserve call-clobbered registers that we modify.
	   We need two scratch regs anyway.  */
	movq	%rsi, -16(%rsp)
	movq	%fs:DTV_OFFSET, %rsi
	movq	%rdi, -8(%rsp)
	movq	TLSDESC_ARG(%rax), %rdi
	movq	(%rsi), %rax
	cmpq	%rax, TLSDESC_GEN_COUNT(%rdi)
	ja	.Lslow
	movq	TLSDESC_MODID(%rdi), %rax
	salq	$4, %rax
	movq	(%rax,%rsi), %rax
	cmpq	$-1, %rax
	je	.Lslow
	addq	TLSDESC_MODOFF(%rdi), %rax
.Lret:
	movq	-16(%rsp), %rsi
	subq	%fs:0, %rax
	movq	-8(%rsp), %rdi
	ret
.Lslow:
	/* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
	   r10 and r11.  Also, align the stack, that's off by 8 bytes.	*/
	subq	$72, %rsp
	cfi_adjust_cfa_offset (72)
	movq	%rdx, 8(%rsp)
	movq	%rcx, 16(%rsp)
	movq	%r8, 24(%rsp)
	movq	%r9, 32(%rsp)
	movq	%r10, 40(%rsp)
	movq	%r11, 48(%rsp)
	/* %rdi already points to the tlsinfo data structure.  */
	call	__tls_get_addr@PLT
	movq	8(%rsp), %rdx
	movq	16(%rsp), %rcx
	movq	24(%rsp), %r8
	movq	32(%rsp), %r9
	movq	40(%rsp), %r10
	movq	48(%rsp), %r11
	addq	$72, %rsp
	cfi_adjust_cfa_offset (-72)
	jmp	.Lret
	cfi_endproc
	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
#endif /* SHARED */

     /* This function is a wrapper for a lazy resolver for TLS_DESC
	RELA relocations.  The incoming 0(%rsp) points to the caller's
	link map, pushed by the dynamic object's internal lazy TLS
	resolver front-end before tail-calling us.  We need to pop it
	ourselves.  %rax points to a TLS descriptor, such that 0(%rax)
	holds the address of the internal resolver front-end (unless
	some other thread beat us to resolving it) and 8(%rax) holds a
	pointer to the relocation.

	When the actual resolver returns, it will have adjusted the
	TLS descriptor such that we can tail-call it for it to return
	the TP offset of the symbol.  */

	.hidden _dl_tlsdesc_resolve_rela
	.global	_dl_tlsdesc_resolve_rela
	.type	_dl_tlsdesc_resolve_rela,@function
	cfi_startproc
	.align 16
	/* The PLT entry will have pushed the link_map pointer.  */
_dl_tlsdesc_resolve_rela:
	cfi_adjust_cfa_offset (8)
	/* Save all call-clobbered registers.  */
	subq	$72, %rsp
	cfi_adjust_cfa_offset (72)
	movq	%rax, (%rsp)
	movq	%rdi, 8(%rsp)
	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
	movq	%rsi, 16(%rsp)
	movq	72(%rsp), %rsi	/* Pass link_map* in %rsi.  */
	movq	%r8, 24(%rsp)
	movq	%r9, 32(%rsp)
	movq	%r10, 40(%rsp)
	movq	%r11, 48(%rsp)
	movq	%rdx, 56(%rsp)
	movq	%rcx, 64(%rsp)
	call	_dl_tlsdesc_resolve_rela_fixup
	movq	(%rsp), %rax
	movq	8(%rsp), %rdi
	movq	16(%rsp), %rsi
	movq	24(%rsp), %r8
	movq	32(%rsp), %r9
	movq	40(%rsp), %r10
	movq	48(%rsp), %r11
	movq	56(%rsp), %rdx
	movq	64(%rsp), %rcx
	addq	$80, %rsp
	cfi_adjust_cfa_offset (-80)
	jmp	*(%rax)
	cfi_endproc
	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela

     /* This function is a placeholder for lazy resolving of TLS
	relocations.  Once some thread starts resolving a TLS
	relocation, it sets up the TLS descriptor to use this
	resolver, such that other threads that would attempt to
	resolve it concurrently may skip the call to the original lazy
	resolver and go straight to a condition wait.

	When the actual resolver returns, it will have adjusted the
	TLS descriptor such that we can tail-call it for it to return
	the TP offset of the symbol.  */

	.hidden _dl_tlsdesc_resolve_hold
	.global	_dl_tlsdesc_resolve_hold
	.type	_dl_tlsdesc_resolve_hold,@function
	cfi_startproc
	.align 16
_dl_tlsdesc_resolve_hold:
0:
	/* Save all call-clobbered registers.  */
	subq	$72, %rsp
	cfi_adjust_cfa_offset (72)
	movq	%rax, (%rsp)
	movq	%rdi, 8(%rsp)
	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
	movq	%rsi, 16(%rsp)
	/* Pass _dl_tlsdesc_resolve_hold's address in %rsi.  */
	leaq	. - _dl_tlsdesc_resolve_hold(%rip), %rsi
	movq	%r8, 24(%rsp)
	movq	%r9, 32(%rsp)
	movq	%r10, 40(%rsp)
	movq	%r11, 48(%rsp)
	movq	%rdx, 56(%rsp)
	movq	%rcx, 64(%rsp)
	call	_dl_tlsdesc_resolve_hold_fixup
1:
	movq	(%rsp), %rax
	movq	8(%rsp), %rdi
	movq	16(%rsp), %rsi
	movq	24(%rsp), %r8
	movq	32(%rsp), %r9
	movq	40(%rsp), %r10
	movq	48(%rsp), %r11
	movq	56(%rsp), %rdx
	movq	64(%rsp), %rcx
	addq	$72, %rsp
	cfi_adjust_cfa_offset (-72)
	jmp	*(%eax)
	cfi_endproc
	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold