about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-10-05 11:51:50 -0400
committerRich Felker <dalias@aerifal.cx>2012-10-05 11:51:50 -0400
commitdcd60371500a74d489372cac7240674c992c2484 (patch)
treeef219cc9f3e27e877aa0de62c6f74f1a4b89595a
parent642b7593c3b3488d229488a436bab294dcc27ee9 (diff)
downloadmusl-dcd60371500a74d489372cac7240674c992c2484.tar.gz
musl-dcd60371500a74d489372cac7240674c992c2484.tar.xz
musl-dcd60371500a74d489372cac7240674c992c2484.zip
support for TLS in dynamic-loaded (dlopen) modules
unlike other implementations, this one reserves memory for new TLS in
all pre-existing threads at dlopen-time, and dlopen will fail with no
resources consumed and no new libraries loaded if memory is not
available. memory is not immediately distributed to running threads;
that would be too complex and too costly. instead, assurances are made
that threads needing the new TLS can obtain it in an async-signal-safe
way from a buffer belonging to the dynamic linker/new module (via
atomic fetch-and-add based allocator).

I've re-appropriated the lock that was previously used for __synccall
(synchronizing set*id() syscalls between threads) as a general
pthread_create lock. it's a "backwards" rwlock where the "read"
operation is safe atomic modification of the live thread count, which
multiple threads can perform at the same time, and the "write"
operation is making sure the count does not increase during an
operation that depends on it remaining bounded (__synccall or dlopen).
in static-linked programs that don't use __synccall, this lock is a
no-op and has no cost.
-rw-r--r--src/env/__init_tls.c4
-rw-r--r--src/internal/libc.h2
-rw-r--r--src/internal/pthread_impl.h5
-rw-r--r--src/ldso/dynlink.c100
-rw-r--r--src/thread/lock_ptc.c18
-rw-r--r--src/thread/pthread_create.c18
-rw-r--r--src/thread/synccall.c15
7 files changed, 115 insertions, 47 deletions
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index aff388bd..b19bdb64 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -9,7 +9,7 @@
 static void *image;
 static size_t len, size, align;
 
-void *__copy_tls(unsigned char *mem, size_t cnt)
+void *__copy_tls(unsigned char *mem)
 {
 	mem += -size & (4*sizeof(size_t)-1);
 	mem += ((uintptr_t)image - (uintptr_t)mem) & (align-1);
@@ -64,7 +64,7 @@ void __init_tls(size_t *auxv)
 		MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 	if (mem == MAP_FAILED) a_crash();
 
-	if (!__install_initial_tls(__copy_tls(mem, 0))) a_crash();
+	if (!__install_initial_tls(__copy_tls(mem))) a_crash();
 }
 #else
 void __init_tls(size_t *auxv) { }
diff --git a/src/internal/libc.h b/src/internal/libc.h
index ca06f319..f88d846d 100644
--- a/src/internal/libc.h
+++ b/src/internal/libc.h
@@ -16,7 +16,7 @@ struct __libc {
 	int canceldisable;
 	FILE *ofl_head;
 	int ofl_lock[2];
-	volatile size_t tls_size, tls_cnt;
+	size_t tls_size;
 };
 
 extern size_t __hwcap;
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index f7facba3..0f10cc48 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -111,8 +111,9 @@ int __timedwait(volatile int *, int, clockid_t, const struct timespec *, void (*
 void __wait(volatile int *, volatile int *, int, int);
 void __wake(volatile int *, int, int);
 
-void __synccall_lock();
-void __synccall_unlock();
+void __acquire_ptc();
+void __release_ptc();
+void __inhibit_ptc();
 
 #define DEFAULT_STACK_SIZE 81920
 #define DEFAULT_GUARD_SIZE PAGE_SIZE
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 2bbd492a..f2bcd0f3 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -69,6 +69,9 @@ struct dso {
 	struct dso **deps;
 	void *tls_image;
 	size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+	void **new_dtv;
+	unsigned char *new_tls;
+	int new_dtv_idx, new_tls_idx;
 	char *shortname;
 	char buf[];
 };
@@ -420,6 +423,8 @@ static struct dso *load_library(const char *name)
 	struct dso *p, temp_dso = {0};
 	int fd;
 	struct stat st;
+	size_t alloc_size;
+	int n_th = 0;
 
 	/* Catch and block attempts to reload the implementation itself */
 	if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
@@ -487,18 +492,27 @@ static struct dso *load_library(const char *name)
 	map = map_library(fd, &temp_dso);
 	close(fd);
 	if (!map) return 0;
-	p = malloc(sizeof *p + strlen(pathname) + 1);
+
+	/* Allocate storage for the new DSO. When there is TLS, this
+	 * storage must include a reservation for all pre-existing
+	 * threads to obtain copies of both the new TLS, and an
+	 * extended DTV capable of storing an additional slot for
+	 * the newly-loaded DSO. */
+	alloc_size = sizeof *p + strlen(pathname) + 1;
+	if (runtime && temp_dso.tls_image) {
+		size_t per_th = temp_dso.tls_size + temp_dso.tls_align
+			+ sizeof(void *) * (tls_cnt+3);
+		n_th = __libc.threads_minus_1 + 1;
+		if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
+		else alloc_size += n_th * per_th;
+	}
+	p = calloc(1, alloc_size);
 	if (!p) {
 		munmap(map, map_len);
 		return 0;
 	}
 	memcpy(p, &temp_dso, sizeof temp_dso);
 	decode_dyn(p);
-	if (p->tls_image) {
-		p->tls_id = ++tls_cnt;
-		tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
-			& -4*sizeof(size_t);
-	}
 	p->dev = st.st_dev;
 	p->ino = st.st_ino;
 	p->refcnt = 1;
@@ -506,6 +520,14 @@ static struct dso *load_library(const char *name)
 	strcpy(p->name, pathname);
 	/* Add a shortname only if name arg was not an explicit pathname. */
 	if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
+	if (p->tls_image) {
+		p->tls_id = ++tls_cnt;
+		tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
+			& -4*sizeof(size_t);
+		p->new_dtv = (void *)(-sizeof(size_t) &
+			(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
+		p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
+	}
 
 	tail->next = p;
 	p->prev = tail;
@@ -637,14 +659,14 @@ void _dl_debug_state(void)
 {
 }
 
-void *__copy_tls(unsigned char *mem, size_t cnt)
+void *__copy_tls(unsigned char *mem)
 {
 	struct dso *p;
 	void **dtv = (void *)mem;
-	dtv[0] = (void *)cnt;
-	mem = (void *)(dtv + cnt + 1);
+	dtv[0] = (void *)tls_cnt;
+	mem = (void *)(dtv + tls_cnt + 1);
 	for (p=tail; p; p=p->prev) {
-		if (p->tls_id-1 >= cnt) continue;
+		if (!p->tls_id) continue;
 		mem += -p->tls_len & (4*sizeof(size_t)-1);
 		mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
 			& (p->tls_align-1);
@@ -656,14 +678,46 @@ void *__copy_tls(unsigned char *mem, size_t cnt)
 	return mem;
 }
 
-void *__tls_get_addr(size_t *p)
+void *__tls_get_addr(size_t *v)
 {
 	pthread_t self = __pthread_self();
-	if ((size_t)self->dtv[0] < p[0]) {
-		// FIXME: obtain new DTV and TLS from the DSO
-		a_crash();
+	if (self->dtv && v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]])
+		return (char *)self->dtv[v[0]]+v[1];
+
+	/* Block signals to make accessing new TLS async-signal-safe */
+	sigset_t set;
+	sigfillset(&set);
+	pthread_sigmask(SIG_BLOCK, &set, &set);
+	if (self->dtv && v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]]) {
+		pthread_sigmask(SIG_SETMASK, &set, 0);
+		return (char *)self->dtv[v[0]]+v[1];
+	}
+
+	/* This is safe without any locks held because, if the caller
+	 * is able to request the Nth entry of the DTV, the DSO list
+	 * must be valid at least that far out and it was synchronized
+	 * at program startup or by an already-completed call to dlopen. */
+	struct dso *p;
+	for (p=head; p->tls_id != v[0]; p=p->next);
+
+	/* Get new DTV space from new DSO if needed */
+	if (!self->dtv || v[0] > (size_t)self->dtv[0]) {
+		void **newdtv = p->new_dtv +
+			(v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
+		if (self->dtv) memcpy(newdtv, self->dtv,
+			((size_t)self->dtv[0]+1) * sizeof(void *));
+		newdtv[0] = (void *)v[0];
+		self->dtv = newdtv;
 	}
-	return (char *)self->dtv[p[0]] + p[1];
+
+	/* Get new TLS memory from new DSO */
+	unsigned char *mem = p->new_tls +
+		(p->tls_size + p->tls_align) * a_fetch_add(&p->new_tls_idx,1);
+	mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) & (p->tls_align-1);
+	self->dtv[v[0]] = mem;
+	memcpy(mem, p->tls_image, p->tls_size);
+	pthread_sigmask(SIG_SETMASK, &set, 0);
+	return mem + v[1];
 }
 
 void *__dynlink(int argc, char **argv)
@@ -830,13 +884,12 @@ void *__dynlink(int argc, char **argv)
 	 * to copy the TLS images again in case they had relocs. */
 	tls_size += sizeof(struct pthread) + 4*sizeof(size_t);
 	__libc.tls_size = tls_size;
-	__libc.tls_cnt = tls_cnt;
 	if (tls_cnt) {
 		struct dso *p;
 		void *mem = mmap(0, __libc.tls_size, PROT_READ|PROT_WRITE,
 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 		if (mem==MAP_FAILED ||
-		    !__install_initial_tls(__copy_tls(mem, tls_cnt))) {
+		    !__install_initial_tls(__copy_tls(mem))) {
 			dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
 				argv[0], tls_size);
 			_exit(127);
@@ -853,7 +906,7 @@ void *__dynlink(int argc, char **argv)
 
 	/* The initial DTV is located at the base of the memory
 	 * allocated for TLS. Repeat copying TLS to pick up relocs. */
-	if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv, tls_cnt);
+	if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv);
 
 	if (ldso_fail) _exit(127);
 	if (ldd_mode) _exit(0);
@@ -887,6 +940,7 @@ void *__dynlink(int argc, char **argv)
 void *dlopen(const char *file, int mode)
 {
 	struct dso *volatile p, *orig_tail, *next;
+	size_t orig_tls_cnt;
 	size_t i;
 	int cs;
 
@@ -894,12 +948,15 @@ void *dlopen(const char *file, int mode)
 
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 	pthread_rwlock_wrlock(&lock);
+	__inhibit_ptc();
 
+	p = 0;
+	orig_tls_cnt = tls_cnt;
 	orig_tail = tail;
 
 	if (setjmp(rtld_fail)) {
 		/* Clean up anything new that was (partially) loaded */
-		if (p->deps) for (i=0; p->deps[i]; i++)
+		if (p && p->deps) for (i=0; p->deps[i]; i++)
 			if (p->deps[i]->global < 0)
 				p->deps[i]->global = 0;
 		for (p=orig_tail->next; p; p=next) {
@@ -908,6 +965,8 @@ void *dlopen(const char *file, int mode)
 			free(p->deps);
 			free(p);
 		}
+		tls_cnt = orig_tls_cnt;
+		tls_size = __libc.tls_size;
 		tail = orig_tail;
 		tail->next = 0;
 		p = 0;
@@ -942,12 +1001,15 @@ void *dlopen(const char *file, int mode)
 		p->global = 1;
 	}
 
+	__libc.tls_size = tls_size;
+
 	if (ssp_used) __init_ssp(auxv);
 
 	_dl_debug_state();
 
 	do_init_fini(tail);
 end:
+	__release_ptc();
 	pthread_rwlock_unlock(&lock);
 	pthread_setcancelstate(cs, 0);
 	return p;
diff --git a/src/thread/lock_ptc.c b/src/thread/lock_ptc.c
new file mode 100644
index 00000000..7adedab7
--- /dev/null
+++ b/src/thread/lock_ptc.c
@@ -0,0 +1,18 @@
+#include <pthread.h>
+
+static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
+
+void __inhibit_ptc()
+{
+	pthread_rwlock_wrlock(&lock);
+}
+
+void __acquire_ptc()
+{
+	pthread_rwlock_rdlock(&lock);
+}
+
+void __release_ptc()
+{
+	pthread_rwlock_unlock(&lock);
+}
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index f53fc1ba..92ce9ffb 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -4,8 +4,8 @@
 static void dummy_0()
 {
 }
-weak_alias(dummy_0, __synccall_lock);
-weak_alias(dummy_0, __synccall_unlock);
+weak_alias(dummy_0, __acquire_ptc);
+weak_alias(dummy_0, __release_ptc);
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 
 _Noreturn void pthread_exit(void *result)
@@ -84,7 +84,7 @@ static void init_file_lock(FILE *f)
 	if (f && f->lock<0) f->lock = 0;
 }
 
-void *__copy_tls(unsigned char *, size_t);
+void *__copy_tls(unsigned char *);
 
 int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr, void *(*entry)(void *), void *restrict arg)
 {
@@ -94,8 +94,6 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 	struct pthread *self = pthread_self(), *new;
 	unsigned char *map, *stack, *tsd;
 	unsigned flags = 0x7d8f00;
-	size_t tls_cnt = libc.tls_cnt;
-	size_t tls_size = libc.tls_size;
 
 	if (!self) return ENOSYS;
 	if (!libc.threaded) {
@@ -107,6 +105,8 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 		libc.threaded = 1;
 	}
 
+	__acquire_ptc();
+
 	if (attr && attr->_a_stackaddr) {
 		map = 0;
 		tsd = (void *)(attr->_a_stackaddr-__pthread_tsd_size & -16);
@@ -114,7 +114,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 		if (attr) {
 			guard = ROUND(attr->_a_guardsize + DEFAULT_GUARD_SIZE);
 			size = guard + ROUND(attr->_a_stacksize
-				+ DEFAULT_STACK_SIZE + tls_size);
+				+ DEFAULT_STACK_SIZE + libc.tls_size);
 		}
 		size += __pthread_tsd_size;
 		if (guard) {
@@ -130,7 +130,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 		}
 		tsd = map + size - __pthread_tsd_size;
 	}
-	new = __copy_tls(tsd - tls_size, tls_cnt);
+	new = __copy_tls(tsd - libc.tls_size);
 	new->map_base = map;
 	new->map_size = size;
 	new->pid = self->pid;
@@ -147,12 +147,10 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr,
 	new->canary = self->canary ^ (uintptr_t)&new;
 	stack = (void *)new;
 
-	__synccall_lock();
-
 	a_inc(&libc.threads_minus_1);
 	ret = __clone(start, stack, flags, new, &new->tid, new, &new->tid);
 
-	__synccall_unlock();
+	__release_ptc();
 
 	if (ret < 0) {
 		a_dec(&libc.threads_minus_1);
diff --git a/src/thread/synccall.c b/src/thread/synccall.c
index fd377cb3..2b7eac25 100644
--- a/src/thread/synccall.c
+++ b/src/thread/synccall.c
@@ -9,7 +9,6 @@ static struct chain {
 static void (*callback)(void *), *context;
 static int chainlen;
 static sem_t chainlock, chaindone;
-static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
 
 static void handler(int sig, siginfo_t *si, void *ctx)
 {
@@ -59,7 +58,7 @@ void __synccall(void (*func)(void *), void *ctx)
 		return;
 	}
 
-	pthread_rwlock_wrlock(&lock);
+	__inhibit_ptc();
 
 	__syscall(SYS_rt_sigprocmask, SIG_BLOCK, SIGALL_SET,
 		&oldmask, __SYSCALL_SSLEN);
@@ -97,15 +96,5 @@ void __synccall(void (*func)(void *), void *ctx)
 	__syscall(SYS_rt_sigprocmask, SIG_SETMASK,
 		&oldmask, 0, __SYSCALL_SSLEN);
 
-	pthread_rwlock_unlock(&lock);
-}
-
-void __synccall_lock()
-{
-	pthread_rwlock_rdlock(&lock);
-}
-
-void __synccall_unlock()
-{
-	pthread_rwlock_unlock(&lock);
+	__release_ptc();
 }