about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/env/__init_tls.c4
-rw-r--r--src/internal/libc.h2
-rw-r--r--src/internal/pthread_impl.h5
-rw-r--r--src/ldso/dynlink.c100
-rw-r--r--src/thread/lock_ptc.c18
-rw-r--r--src/thread/pthread_create.c18
-rw-r--r--src/thread/synccall.c15
7 files changed, 115 insertions, 47 deletions
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index aff388bd..b19bdb64 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -9,7 +9,7 @@
 static void *image;
 static size_t len, size, align;
 
-void *__copy_tls(unsigned char *mem, size_t cnt)
+void *__copy_tls(unsigned char *mem)
 {
 	mem += -size & (4*sizeof(size_t)-1);
 	mem += ((uintptr_t)image - (uintptr_t)mem) & (align-1);
@@ -64,7 +64,7 @@ void __init_tls(size_t *auxv)
 		MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 	if (mem == MAP_FAILED) a_crash();
 
-	if (!__install_initial_tls(__copy_tls(mem, 0))) a_crash();
+	if (!__install_initial_tls(__copy_tls(mem))) a_crash();
 }
 #else
 void __init_tls(size_t *auxv) { }
diff --git a/src/internal/libc.h b/src/internal/libc.h
index ca06f319..f88d846d 100644
--- a/src/internal/libc.h
+++ b/src/internal/libc.h
@@ -16,7 +16,7 @@ struct __libc {
 	int canceldisable;
 	FILE *ofl_head;
 	int ofl_lock[2];
-	volatile size_t tls_size, tls_cnt;
+	size_t tls_size;
 };
 
 extern size_t __hwcap;
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index f7facba3..0f10cc48 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -111,8 +111,9 @@ int __timedwait(volatile int *, int, clockid_t, const struct timespec *, void (*
 void __wait(volatile int *, volatile int *, int, int);
 void __wake(volatile int *, int, int);
 
-void __synccall_lock();
-void __synccall_unlock();
+void __acquire_ptc();
+void __release_ptc();
+void __inhibit_ptc();
 
 #define DEFAULT_STACK_SIZE 81920
 #define DEFAULT_GUARD_SIZE PAGE_SIZE
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 2bbd492a..f2bcd0f3 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -69,6 +69,9 @@ struct dso {
 	struct dso **deps;
 	void *tls_image;
 	size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+	void **new_dtv;
+	unsigned char *new_tls;
+	int new_dtv_idx, new_tls_idx;
 	char *shortname;
 	char buf[];
 };
@@ -420,6 +423,8 @@ static struct dso *load_library(const char *name)
 	struct dso *p, temp_dso = {0};
 	int fd;
 	struct stat st;
+	size_t alloc_size;
+	int n_th = 0;
 
 	/* Catch and block attempts to reload the implementation itself */
 	if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
@@ -487,18 +492,27 @@ static struct dso *load_library(const char *name)
 	map = map_library(fd, &temp_dso);
 	close(fd);
 	if (!map) return 0;
-	p = malloc(sizeof *p + strlen(pathname) + 1);
+
+	/* Allocate storage for the new DSO. When there is TLS, this
+	 * storage must include a reservation for all pre-existing
+	 * threads to obtain copies of both the new TLS, and an
+	 * extended DTV capable of storing an additional slot for
+	 * the newly-loaded DSO. */
+	alloc_size = sizeof *p + strlen(pathname) + 1;
+	if (runtime && temp_dso.tls_image) {
+		size_t per_th = temp_dso.tls_size + temp_dso.tls_align
+			+ sizeof(void *) * (tls_cnt+3);
+		n_th = __libc.threads_minus_1 + 1;
+		if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
+		else alloc_size += n_th * per_th;
+	}
+	p = calloc(1, alloc_size);
 	if (!p) {
 		munmap(map, map_len);
 		return 0;
 	}
 	memcpy(p, &temp_dso, sizeof temp_dso);
 	decode_dyn(p);
-	if (p->tls_image) {
-		p->tls_id = ++tls_cnt;
-		tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
-			& -4*sizeof(size_t);
-	}
 	p->dev = st.st_dev;
 	p->ino = st.st_ino;
 	p->refcnt = 1;
@@ -506,6 +520,14 @@ static struct dso *load_library(const char *name)
 	strcpy(p->name, pathname);
 	/* Add a shortname only if name arg was not an explicit pathname. */
 	if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
+	if (p->tls_image) {
+		p->tls_id = ++tls_cnt;
+		tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
+			& -4*sizeof(size_t);
+		p->new_dtv = (void *)(-sizeof(size_t) &
+			(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
+		p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
+	}
 
 	tail->next = p;
 	p->prev = tail;
@@ -637,14 +659,14 @@ void _dl_debug_state(void)
 {
 }
 
-void *__copy_tls(unsigned char *mem, size_t cnt)
+void *__copy_tls(unsigned char *mem)
 {
 	struct dso *p;
 	void **dtv = (void *)mem;
-	dtv[0] = (void *)cnt;
-	mem = (void *)(dtv + cnt + 1);
+	dtv[0] = (void *)tls_cnt;
+	mem = (void *)(dtv + tls_cnt + 1);
 	for (p=tail; p; p=p->prev) {
-		if (p->tls_id-1 >= cnt) continue;
+		if (!p->tls_id) continue;
 		mem += -p->tls_len & (4*sizeof(size_t)-1);
 		mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
 			& (p->tls_align-1);
@@ -656,14 +678,46 @@ void *__copy_tls(unsigned char *mem, size_t cnt)
 	return mem;
 }
 
-void *__tls_get_addr(size_t *p)
+void *__tls_get_addr(size_t *v)
 {
 	pthread_t self = __pthread_self();
-	if ((size_t)self->dtv[0] < p[0]) {
-		// FIXME: obtain new DTV and TLS from the DSO
-		a_crash();
+	if (self->dtv && v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]])
+		return (char *)self->dtv[v[0]]+v[1];
+
+	/* Block signals to make accessing new TLS async-signal-safe */
+	sigset_t set;
+	sigfillset(&set);
+	pthread_sigmask(SIG_BLOCK, &set, &set);
+	if (self->dtv && v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]]) {
+		pthread_sigmask(SIG_SETMASK, &set, 0);
+		return (char *)self->dtv[v[0]]+v[1];
+	}
+
+	/* This is safe without any locks held because, if the caller
+	 * is able to request the Nth entry of the DTV, the DSO list
+	 * must be valid at least that far out and it was synchronized
+	 * at program startup or by an already-completed call to dlopen. */
+	struct dso *p;
+	for (p=head; p->tls_id != v[0]; p=p->next);
+
+	/* Get new DTV space from new DSO if needed */
+	if (!self->dtv || v[0] > (size_t)self->dtv[0]) {
+		void **newdtv = p->new_dtv +
+			(v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
+		if (self->dtv) memcpy(newdtv, self->dtv,
+			((size_t)self->dtv[0]+1) * sizeof(void *));
+		newdtv[0] = (void *)v[0];
+		self->dtv = newdtv;
 	}
-	return (char *)self->dtv[p[0]] + p[1];
+
+	/* Get new TLS memory from new DSO */
+	unsigned char *mem = p->new_tls +
+		(p->tls_size + p->tls_align) * a_fetch_add(&p->new_tls_idx,1);
+	mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) & (p->tls_align-1);
+	self->dtv[v[0]] = mem;
+	memcpy(mem, p->tls_image, p->tls_size);
+	pthread_sigmask(SIG_SETMASK, &set, 0);
+	return mem + v[1];
 }
 
 void *__dynlink(int argc, char **argv)
@@ -830,13 +884,12 @@ void *__dynlink(int argc, char **argv)
 	 * to copy the TLS images again in case they had relocs. */
 	tls_size += sizeof(struct pthread) + 4*sizeof(size_t);
 	__libc.tls_size = tls_size;
-	__libc.tls_cnt = tls_cnt;
 	if (tls_cnt) {
 		struct dso *p;
 		void *mem = mmap(0, __libc.tls_size, PROT_READ|PROT_WRITE,
 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 		if (mem==MAP_FAILED ||
-		    !__install_initial_tls(__copy_tls(mem, tls_cnt))) {
+		    !__install_initial_tls(__copy_tls(mem))) {
 			dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
 				argv[0], tls_size);
 			_exit(127);
@@ -853,7 +906,7 @@ void *__dynlink(int argc, char **argv)
 
 	/* The initial DTV is located at the base of the memory
 	 * allocated for TLS. Repeat copying TLS to pick up relocs. */
-	if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv, tls_cnt);
+	if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv);
 
 	if (ldso_fail) _exit(127);
 	if (ldd_mode) _exit(0);
@@ -887,6 +940,7 @@ void *__dynlink(int argc, char **argv)
 void *dlopen(const char *file, int mode)
 {
 	struct dso *volatile p, *orig_tail, *next;
+	size_t orig_tls_cnt;
 	size_t i;
 	int cs;
 
@@ -894,12 +948,15 @@ void *dlopen(const char *file, int mode)
 
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 	pthread_rwlock_wrlock(&lock);
+	__inhibit_ptc();
 
+	p = 0;
+	orig_tls_cnt = tls_cnt;
 	orig_tail = tail;
 
 	if (setjmp(rtld_fail)) {
 		/* Clean up anything new that was (partially) loaded */
-		if (p->deps) for (i=0; p->deps[i]; i++)
+		if (p && p->deps) for (i=0; p->deps[i]; i++)
 			if (p->deps[i]->global < 0)
 				p->deps[i]->global = 0;
 		for (p=orig_tail->next; p; p=next) {
@@ -908,6 +965,8 @@ void *dlopen(const char *file, int mode)
 			free(p->deps);
 			free(p);
 		}
+		tls_cnt = orig_tls_cnt;
+		tls_size = __libc.tls_size;
 		tail = orig_tail;
 		tail->next = 0;
 		p = 0;
@@ -942,12 +1001,15 @@ void *dlopen(const char *file, int mode)
 		p->global = 1;
 	}
 
+	__libc.tls_size = tls_size;
+
 	if (ssp_used) __init_ssp(auxv);
 
 	_dl_debug_state();
 
 	do_init_fini(tail);
 end:
+	__release_ptc();
 	pthread_rwlock_unlock(&lock);
 	pthread_setcancelstate(cs, 0);
 	return p;
diff --git a/src/thread/lock_ptc.c b/src/thread/lock_ptc.c
new file mode 100644
index 00000000..7adedab7
--- /dev/null
+++ b/src/thread/lock_ptc.c
@@ -0,0 +1,18 @@
+#include <pthread.h>
+
+static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
+
+void __inhibit_ptc()
+{
+	pthread_rwlock_wrlock(&lock);
+}
+
+void __acquire_ptc()
+{
+	pthread_rwlock_rdlock(&lock);
+}
+
+void __release_ptc()
+{
+	pthread_rwlock_unlock(&lock);
+}
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index f53fc1ba..92ce9ffb 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -4,8 +4,8 @@
 static void dummy_0()
 {
 }
-weak_alias(dummy_0, __synccall_lock);
-weak_alias(dummy_0, __synccall_unlock);
+weak_alias(dummy_0, __acquire_ptc);
+weak_alias(dummy_0, __release_ptc);
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 
 _Noreturn void pthread_exit(void *result)
@@ -84,7 +84,7 @@ static void init_file_lock(FILE *f)
 	if (f && f->lock<0) f->lock = 0;
 }
 
-void *__copy_tls(unsigned char *, size_t);
+void *__copy_tls(unsigned char *);
 
 int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr, void *(*entry)(void *), void *restrict arg)
 {
@@ -94,8 +94,6 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 	struct pthread *self = pthread_self(), *new;
 	unsigned char *map, *stack, *tsd;
 	unsigned flags = 0x7d8f00;
-	size_t tls_cnt = libc.tls_cnt;
-	size_t tls_size = libc.tls_size;
 
 	if (!self) return ENOSYS;
 	if (!libc.threaded) {
@@ -107,6 +105,8 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 		libc.threaded = 1;
 	}
 
+	__acquire_ptc();
+
 	if (attr && attr->_a_stackaddr) {
 		map = 0;
 		tsd = (void *)(attr->_a_stackaddr-__pthread_tsd_size & -16);
@@ -114,7 +114,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 		if (attr) {
 			guard = ROUND(attr->_a_guardsize + DEFAULT_GUARD_SIZE);
 			size = guard + ROUND(attr->_a_stacksize
-				+ DEFAULT_STACK_SIZE + tls_size);
+				+ DEFAULT_STACK_SIZE + libc.tls_size);
 		}
 		size += __pthread_tsd_size;
 		if (guard) {
@@ -130,7 +130,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 		}
 		tsd = map + size - __pthread_tsd_size;
 	}
-	new = __copy_tls(tsd - tls_size, tls_cnt);
+	new = __copy_tls(tsd - libc.tls_size);
 	new->map_base = map;
 	new->map_size = size;
 	new->pid = self->pid;
@@ -147,12 +147,10 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 	new->canary = self->canary ^ (uintptr_t)&new;
 	stack = (void *)new;
 
-	__synccall_lock();
-
 	a_inc(&libc.threads_minus_1);
 	ret = __clone(start, stack, flags, new, &new->tid, new, &new->tid);
 
-	__synccall_unlock();
+	__release_ptc();
 
 	if (ret < 0) {
 		a_dec(&libc.threads_minus_1);
diff --git a/src/thread/synccall.c b/src/thread/synccall.c
index fd377cb3..2b7eac25 100644
--- a/src/thread/synccall.c
+++ b/src/thread/synccall.c
@@ -9,7 +9,6 @@ static struct chain {
 static void (*callback)(void *), *context;
 static int chainlen;
 static sem_t chainlock, chaindone;
-static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
 
 static void handler(int sig, siginfo_t *si, void *ctx)
 {
@@ -59,7 +58,7 @@ void __synccall(void (*func)(void *), void *ctx)
 		return;
 	}
 
-	pthread_rwlock_wrlock(&lock);
+	__inhibit_ptc();
 
 	__syscall(SYS_rt_sigprocmask, SIG_BLOCK, SIGALL_SET,
 		&oldmask, __SYSCALL_SSLEN);
@@ -97,15 +96,5 @@ void __synccall(void (*func)(void *), void *ctx)
 	__syscall(SYS_rt_sigprocmask, SIG_SETMASK,
 		&oldmask, 0, __SYSCALL_SSLEN);
 
-	pthread_rwlock_unlock(&lock);
-}
-
-void __synccall_lock()
-{
-	pthread_rwlock_rdlock(&lock);
-}
-
-void __synccall_unlock()
-{
-	pthread_rwlock_unlock(&lock);
+	__release_ptc();
 }