about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ldso/dynlink.c121
-rw-r--r--src/internal/pthread_impl.h1
-rw-r--r--src/ldso/aarch64/tlsdesc.s59
-rw-r--r--src/ldso/arm/tlsdesc.S19
-rw-r--r--src/ldso/i386/tlsdesc.s8
-rw-r--r--src/ldso/x86_64/tlsdesc.s21
-rw-r--r--src/thread/__tls_get_addr.c7
-rw-r--r--src/thread/i386/tls.s8
-rw-r--r--src/thread/pthread_create.c2
9 files changed, 86 insertions, 160 deletions
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index ec921dfd..9e2adb21 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -17,6 +17,7 @@
 #include <pthread.h>
 #include <ctype.h>
 #include <dlfcn.h>
+#include <semaphore.h>
 #include "pthread_impl.h"
 #include "libc.h"
 #include "dynlink.h"
@@ -1338,48 +1339,6 @@ void __init_tls(size_t *auxv)
 {
 }
 
-hidden void *__tls_get_new(tls_mod_off_t *v)
-{
-	pthread_t self = __pthread_self();
-
-	/* Block signals to make accessing new TLS async-signal-safe */
-	sigset_t set;
-	__block_all_sigs(&set);
-	if (v[0] <= self->dtv[0]) {
-		__restore_sigs(&set);
-		return (void *)(self->dtv[v[0]] + v[1]);
-	}
-
-	/* This is safe without any locks held because, if the caller
-	 * is able to request the Nth entry of the DTV, the DSO list
-	 * must be valid at least that far out and it was synchronized
-	 * at program startup or by an already-completed call to dlopen. */
-	struct dso *p;
-	for (p=head; p->tls_id != v[0]; p=p->next);
-
-	/* Get new DTV space from new DSO */
-	uintptr_t *newdtv = p->new_dtv +
-		(v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
-	memcpy(newdtv, self->dtv, (self->dtv[0]+1) * sizeof(uintptr_t));
-	newdtv[0] = v[0];
-	self->dtv = self->dtv_copy = newdtv;
-
-	/* Get new TLS memory from all new DSOs up to the requested one */
-	unsigned char *mem;
-	for (p=head; ; p=p->next) {
-		if (!p->tls_id || self->dtv[p->tls_id]) continue;
-		mem = p->new_tls + (p->tls.size + p->tls.align)
-			* a_fetch_add(&p->new_tls_idx,1);
-		mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
-			& (p->tls.align-1);
-		self->dtv[p->tls_id] = (uintptr_t)mem + DTP_OFFSET;
-		memcpy(mem, p->tls.image, p->tls.len);
-		if (p->tls_id == v[0]) break;
-	}
-	__restore_sigs(&set);
-	return mem + v[1] + DTP_OFFSET;
-}
-
 static void update_tls_size()
 {
 	libc.tls_cnt = tls_cnt;
@@ -1392,6 +1351,82 @@ static void update_tls_size()
 	tls_align);
 }
 
+void __dl_prepare_for_threads(void)
+{
+	/* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */
+	__syscall(SYS_membarrier, 1<<4, 0);
+}
+
+static sem_t barrier_sem;
+static void bcast_barrier(int s)
+{
+	sem_post(&barrier_sem);
+}
+
+static void install_new_tls(void)
+{
+	sigset_t set;
+	pthread_t self = __pthread_self(), td;
+	uintptr_t (*newdtv)[tls_cnt+1] = (void *)tail->new_dtv;
+	struct dso *p;
+	size_t i, j;
+	size_t old_cnt = self->dtv[0];
+
+	__block_app_sigs(&set);
+	__tl_lock();
+	/* Copy existing dtv contents from all existing threads. */
+	for (i=0, td=self; !i || td!=self; i++, td=td->next) {
+		memcpy(newdtv+i, td->dtv,
+			(old_cnt+1)*sizeof(uintptr_t));
+		newdtv[i][0] = tls_cnt;
+	}
+	/* Install new dtls into the enlarged, uninstalled dtv copies. */
+	for (p=head; ; p=p->next) {
+		if (!p->tls_id || self->dtv[p->tls_id]) continue;
+		unsigned char *mem = p->new_tls;
+		for (j=0; j<i; j++) {
+			unsigned char *new = mem;
+			new += ((uintptr_t)p->tls.image - (uintptr_t)mem)
+				& (p->tls.align-1);
+			memcpy(new, p->tls.image, p->tls.len);
+			newdtv[j][p->tls_id] =
+				(uintptr_t)new + DTP_OFFSET;
+			mem += p->tls.size + p->tls.align;
+		}
+		if (p->tls_id == tls_cnt) break;
+	}
+
+	/* Broadcast barrier to ensure contents of new dtv is visible
+	 * if the new dtv pointer is. Use SYS_membarrier if it works,
+	 * otherwise emulate with a signal. */
+
+	/* MEMBARRIER_CMD_PRIVATE_EXPEDITED */
+	if (__syscall(SYS_membarrier, 1<<3, 0)) {
+		sem_init(&barrier_sem, 0, 0);
+		struct sigaction sa = {
+			.sa_flags = SA_RESTART,
+			.sa_handler = bcast_barrier
+		};
+		memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
+		__libc_sigaction(SIGSYNCCALL, &sa, 0);	
+		for (td=self->next; td!=self; td=td->next)
+			if (j) __syscall(SYS_tkill, td->tid, SIGSYNCCALL);
+		for (td=self->next; td!=self; td=td->next)
+			sem_wait(&barrier_sem);
+		sa.sa_handler = SIG_IGN;
+		__libc_sigaction(SIGSYNCCALL, &sa, 0);
+		sem_destroy(&barrier_sem);
+	}
+
+	/* Install new dtv for each thread. */
+	for (j=0, td=self; !j || td!=self; j++, td=td->next) {
+		td->dtv = td->dtv_copy = newdtv[j];
+	}
+
+	__tl_unlock();
+	__restore_sigs(&set);
+}
+
 /* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
  * following stage 2 and stage 3 functions via primitive symbolic lookup
  * since it does not have access to their addresses to begin with. */
@@ -1864,6 +1899,8 @@ void *dlopen(const char *file, int mode)
 	redo_lazy_relocs();
 
 	update_tls_size();
+	if (tls_cnt != orig_tls_cnt)
+		install_new_tls();
 	_dl_debug_state();
 	orig_tail = tail;
 end:
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index d5d969ec..de089967 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -130,6 +130,7 @@ hidden int __init_tp(void *);
 hidden void *__copy_tls(unsigned char *);
 hidden void __reset_tls();
 
+hidden void __dl_prepare_for_threads(void);
 hidden void __dl_thread_cleanup(void);
 hidden void __testcancel();
 hidden void __do_cleanup_push(struct __ptcb *);
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index 8e4004d7..c91baa45 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -29,67 +29,10 @@ __tlsdesc_dynamic:
 	ldr x0,[x0,#8]        // p
 	ldr x2,[x0]           // p->modidx
 	ldr x3,[x1,#-8]       // dtv
-	ldr x4,[x3]           // dtv[0]
-	cmp x2,x4
-	b.hi 1f
 	ldr x2,[x3,x2,lsl #3] // dtv[p->modidx]
 	ldr x0,[x0,#8]        // p->off
 	add x0,x0,x2
-2:	sub x0,x0,x1
+	sub x0,x0,x1
 	ldp x3,x4,[sp,#16]
 	ldp x1,x2,[sp],#32
 	ret
-
-	// save all registers __tls_get_new may clobber
-	// update sp in two steps because offset must be in [-512,509]
-1:	stp x29,x30,[sp,#-160]!
-	stp x5,x6,[sp,#16]
-	stp x7,x8,[sp,#32]
-	stp x9,x10,[sp,#48]
-	stp x11,x12,[sp,#64]
-	stp x13,x14,[sp,#80]
-	stp x15,x16,[sp,#96]
-	stp x17,x18,[sp,#112]
-	stp q0,q1,[sp,#128]
-	stp q2,q3,[sp,#-480]!
-	stp q4,q5,[sp,#32]
-	stp q6,q7,[sp,#64]
-	stp q8,q9,[sp,#96]
-	stp q10,q11,[sp,#128]
-	stp q12,q13,[sp,#160]
-	stp q14,q15,[sp,#192]
-	stp q16,q17,[sp,#224]
-	stp q18,q19,[sp,#256]
-	stp q20,q21,[sp,#288]
-	stp q22,q23,[sp,#320]
-	stp q24,q25,[sp,#352]
-	stp q26,q27,[sp,#384]
-	stp q28,q29,[sp,#416]
-	stp q30,q31,[sp,#448]
-	bl __tls_get_new
-	mrs x1,tpidr_el0
-	ldp q4,q5,[sp,#32]
-	ldp q6,q7,[sp,#64]
-	ldp q8,q9,[sp,#96]
-	ldp q10,q11,[sp,#128]
-	ldp q12,q13,[sp,#160]
-	ldp q14,q15,[sp,#192]
-	ldp q16,q17,[sp,#224]
-	ldp q18,q19,[sp,#256]
-	ldp q20,q21,[sp,#288]
-	ldp q22,q23,[sp,#320]
-	ldp q24,q25,[sp,#352]
-	ldp q26,q27,[sp,#384]
-	ldp q28,q29,[sp,#416]
-	ldp q30,q31,[sp,#448]
-	ldp q2,q3,[sp],#480
-	ldp x5,x6,[sp,#16]
-	ldp x7,x8,[sp,#32]
-	ldp x9,x10,[sp,#48]
-	ldp x11,x12,[sp,#64]
-	ldp x13,x14,[sp,#80]
-	ldp x15,x16,[sp,#96]
-	ldp x17,x18,[sp,#112]
-	ldp q0,q1,[sp,#128]
-	ldp x29,x30,[sp],#160
-	b 2b
diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S
index 4e67c3e2..455eac1d 100644
--- a/src/ldso/arm/tlsdesc.S
+++ b/src/ldso/arm/tlsdesc.S
@@ -35,13 +35,9 @@ __tlsdesc_dynamic:
 #endif
 #endif
 	ldr r3,[r0,#-4] // r3 = dtv
-	ldr ip,[r3]     // ip = dtv slot count
-	cmp r1,ip
-	bhi 3f
 	ldr ip,[r3,r1,LSL #2]
 	sub r0,ip,r0
 	add r0,r0,r2    // r0 = r3[r1]-r0+r2
-4:
 #if __ARM_ARCH >= 5
 	pop {r2,r3,ip,pc}
 #else
@@ -49,21 +45,6 @@ __tlsdesc_dynamic:
 	bx lr
 #endif
 
-3:
-#if __ARM_PCS_VFP || !__SOFTFP__
-	.fpu vfp
-	vpush {d0-d7}
-#endif
-	push {r0-r3}
-	add r0,sp,#4
-	bl __tls_get_new
-	pop {r1-r3,ip}
-#if __ARM_PCS_VFP || !__SOFTFP__
-	vpop {d0-d7}
-#endif
-	sub r0,r0,r1    // r0 = retval-tp
-	b 4b
-
 #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
  || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 #else
diff --git a/src/ldso/i386/tlsdesc.s b/src/ldso/i386/tlsdesc.s
index 4a553bce..a5c0100c 100644
--- a/src/ldso/i386/tlsdesc.s
+++ b/src/ldso/i386/tlsdesc.s
@@ -17,15 +17,9 @@ __tlsdesc_dynamic:
 	mov %gs:4,%edx
 	push %ecx
 	mov (%eax),%ecx
-	cmp %ecx,(%edx)
-	jc 1f
 	mov 4(%eax),%eax
 	add (%edx,%ecx,4),%eax
-2:	pop %ecx
+	pop %ecx
 	sub %gs:0,%eax
 	pop %edx
 	ret
-1:	push %eax
-	call __tls_get_new
-	pop %ecx
-	jmp 2b
diff --git a/src/ldso/x86_64/tlsdesc.s b/src/ldso/x86_64/tlsdesc.s
index 8238c3eb..0151d15c 100644
--- a/src/ldso/x86_64/tlsdesc.s
+++ b/src/ldso/x86_64/tlsdesc.s
@@ -17,28 +17,9 @@ __tlsdesc_dynamic:
 	mov %fs:8,%rdx
 	push %rcx
 	mov (%rax),%rcx
-	cmp %rcx,(%rdx)
-	jc 1f
 	mov 8(%rax),%rax
 	add (%rdx,%rcx,8),%rax
-2:	pop %rcx
+	pop %rcx
 	sub %fs:0,%rax
 	pop %rdx
 	ret
-1:	push %rdi
-	push %rdi
-	push %rsi
-	push %r8
-	push %r9
-	push %r10
-	push %r11
-	mov %rax,%rdi
-	call __tls_get_new
-	pop %r11
-	pop %r10
-	pop %r9
-	pop %r8
-	pop %rsi
-	pop %rdi
-	pop %rdi
-	jmp 2b
diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c
index d7afdabd..19524fe0 100644
--- a/src/thread/__tls_get_addr.c
+++ b/src/thread/__tls_get_addr.c
@@ -1,12 +1,7 @@
-#include <stddef.h>
 #include "pthread_impl.h"
 
 void *__tls_get_addr(tls_mod_off_t *v)
 {
 	pthread_t self = __pthread_self();
-	if (v[0] <= self->dtv[0])
-		return (void *)(self->dtv[v[0]] + v[1]);
-	return __tls_get_new(v);
+	return (void *)(self->dtv[v[0]] + v[1]);
 }
-
-weak_alias(__tls_get_addr, __tls_get_new);
diff --git a/src/thread/i386/tls.s b/src/thread/i386/tls.s
index 76d5d462..6e4c4cb9 100644
--- a/src/thread/i386/tls.s
+++ b/src/thread/i386/tls.s
@@ -4,14 +4,6 @@
 ___tls_get_addr:
 	mov %gs:4,%edx
 	mov (%eax),%ecx
-	cmp %ecx,(%edx)
-	jc 1f
 	mov 4(%eax),%eax
 	add (%edx,%ecx,4),%eax
 	ret
-1:	push %eax
-.weak __tls_get_new
-.hidden __tls_get_new
-	call __tls_get_new
-	pop %edx
-	ret
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index cec82157..0142b347 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -15,6 +15,7 @@ weak_alias(dummy_0, __release_ptc);
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 weak_alias(dummy_0, __do_orphaned_stdio_locks);
 weak_alias(dummy_0, __dl_thread_cleanup);
+weak_alias(dummy_0, __dl_prepare_for_threads);
 
 void __tl_lock(void)
 {
@@ -235,6 +236,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 		init_file_lock(__stderr_used);
 		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
 		self->tsd = (void **)__pthread_tsd_main;
+		__dl_prepare_for_threads();
 		libc.threaded = 1;
 	}
 	if (attrp && !c11) attr = *attrp;