about summary refs log tree commit diff
path: root/src/ldso
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2015-04-13 02:56:26 -0400
committerRich Felker <dalias@aerifal.cx>2015-04-13 03:04:42 -0400
commitf3ddd173806fd5c60b3f034528ca24542aecc5b9 (patch)
tree63cc7432a3c40f011c4818de32ef6257acbf0e73 /src/ldso
parent385c01112c083eb383d972da45836d497cc0556d (diff)
downloadmusl-f3ddd173806fd5c60b3f034528ca24542aecc5b9.tar.gz
musl-f3ddd173806fd5c60b3f034528ca24542aecc5b9.tar.xz
musl-f3ddd173806fd5c60b3f034528ca24542aecc5b9.zip
dynamic linker bootstrap overhaul
this overhaul further reduces the amount of arch-specific code needed
by the dynamic linker and removes a number of assumptions, including:

- that symbolic function references inside libc are bound at link time
  via the linker option -Bsymbolic-functions.

- that libc functions used by the dynamic linker do not require
  access to data symbols.

- that static/internal function calls and data accesses can be made
  without performing any relocations, or that arch-specific startup
  code handled any such relocations needed.

removing these assumptions paves the way for allowing libc.so itself
to be built with stack protector (among other things), and is achieved
by a three-stage bootstrap process:

1. relative relocations are processed with a flat function.
2. symbolic relocations are processed with no external calls/data.
3. main program and dependency libs are processed with a
   fully-functional libc/ldso.

reduction in arch-specific code is achived through the following:

- crt_arch.h, used for generating crt1.o, now provides the entry point
  for the dynamic linker too.

- asm is no longer responsible for skipping the beginning of argv[]
  when ldso is invoked as a command.

- the functionality previously provided by __reloc_self for heavily
  GOT-dependent RISC archs is now the arch-agnostic stage-1.

- arch-specific relocation type codes are mapped directly as macros
  rather than via an inline translation function/switch statement.
Diffstat (limited to 'src/ldso')
-rw-r--r--src/ldso/aarch64/start.s18
-rw-r--r--src/ldso/arm/start.s18
-rw-r--r--src/ldso/dlstart.c107
-rw-r--r--src/ldso/dynlink.c347
-rw-r--r--src/ldso/i386/start.s22
-rw-r--r--src/ldso/microblaze/start.s28
-rw-r--r--src/ldso/mips/start.s46
-rw-r--r--src/ldso/or1k/start.s34
-rw-r--r--src/ldso/powerpc/start.s29
-rw-r--r--src/ldso/sh/start.s26
-rw-r--r--src/ldso/start.c8
-rw-r--r--src/ldso/x32/start.s24
-rw-r--r--src/ldso/x86_64/start.s16
13 files changed, 281 insertions, 442 deletions
diff --git a/src/ldso/aarch64/start.s b/src/ldso/aarch64/start.s
deleted file mode 100644
index 41d1d1e2..00000000
--- a/src/ldso/aarch64/start.s
+++ /dev/null
@@ -1,18 +0,0 @@
-.global _dlstart
-_dlstart:
-	ldr x0,[sp]
-	add x1,sp,#8
-	bl __dynlink
-	mov x1,sp
-	ldr x2,[x1],#8
-1:	sub x2,x2,1
-	ldr x3,[x1],#8
-	cmn x3,#1
-	b.eq 1b
-	add x2,x2,1
-	str x3,[x1,#-8]!
-	str x2,[x1,#-8]!
-	mov sp,x1
-	mov x1,x0
-	mov x0,#0
-	blr x1
diff --git a/src/ldso/arm/start.s b/src/ldso/arm/start.s
deleted file mode 100644
index 5dd93b55..00000000
--- a/src/ldso/arm/start.s
+++ /dev/null
@@ -1,18 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	ldr r0,[sp]
-	add r1,sp,#4
-	bl __dynlink
-	pop {r1}
-1:	sub r1,r1,#1
-	pop {r2}
-	cmp r2,#-1
-	beq 1b
-	add r1,r1,#1
-	push {r1,r2}
-	mov r1,r0
-	mov r0,#0
-	tst r1,#1
-	moveq pc,r1
-	bx r1
diff --git a/src/ldso/dlstart.c b/src/ldso/dlstart.c
new file mode 100644
index 00000000..5bd2a080
--- /dev/null
+++ b/src/ldso/dlstart.c
@@ -0,0 +1,107 @@
+#include <stddef.h>
+#include "dynlink.h"
+
+#ifdef SHARED
+
+#ifndef START
+#define START "_dlstart"
+#endif
+
+#include "crt_arch.h"
+
+void _dlstart_c(size_t *sp, size_t *dynv)
+{
+	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
+
+	int argc = *sp;
+	char **argv = (void *)(sp+1);
+
+	for (i=argc+1; argv[i]; i++);
+	size_t *auxv = (void *)(argv+i+1);
+
+	for (i=0; i<AUX_CNT; i++) aux[i] = 0;
+	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT)
+		aux[auxv[i]] = auxv[i+1];
+
+	for (i=0; i<DYN_CNT; i++) dyn[i] = 0;
+	for (i=0; dynv[i]; i+=2) if (dynv[i]<DYN_CNT)
+		dyn[dynv[i]] = dynv[i+1];
+
+	/* If the dynamic linker is invoked as a command, its load
+	 * address is not available in the aux vector. Instead, compute
+	 * the load address as the difference between &_DYNAMIC and the
+	 * virtual address in the PT_DYNAMIC program header. */
+	unsigned char *base = (void *)aux[AT_BASE];
+	if (!base) {
+		size_t phnum = aux[AT_PHNUM];
+		size_t phentsize = aux[AT_PHENT];
+		Phdr *ph = (void *)aux[AT_PHDR];
+		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
+			if (ph->p_type == PT_DYNAMIC) {
+				base = (void *)((size_t)dynv - ph->p_vaddr);
+				break;
+			}
+		}
+	}
+
+	/* MIPS uses an ugly packed form for GOT relocations. Since we
+	 * can't make function calls yet and the code is tiny anyway,
+	 * it's simply inlined here. */
+	if (NEED_MIPS_GOT_RELOCS) {
+		size_t local_cnt = 0;
+		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
+		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
+			local_cnt = dynv[i+1];
+		for (i=0; i<local_cnt; i++) got[i] += (size_t)base;
+	}
+
+	/* The use of the reloc_info structure and nested loops is a trick
+	 * to work around the fact that we can't necessarily make function
+	 * calls yet. Each struct in the array serves like the arguments
+	 * to a function call. */
+	struct {
+		void *rel;
+		size_t size;
+		size_t stride;
+	} reloc_info[] = {
+		{ base+dyn[DT_JMPREL], dyn[DT_PLTRELSZ], 2+(dyn[DT_PLTREL]==DT_RELA) },
+		{ base+dyn[DT_REL], dyn[DT_RELSZ], 2 },
+		{ base+dyn[DT_RELA], dyn[DT_RELASZ], 3 },
+		{ 0, 0, 0 }
+	};
+
+	for (i=0; reloc_info[i].stride; i++) {
+		size_t *rel = reloc_info[i].rel;
+		size_t rel_size = reloc_info[i].size;
+		size_t stride = reloc_info[i].stride;
+		for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+			if (!IS_RELATIVE(rel[1])) continue;
+			size_t *rel_addr = (void *)(base + rel[0]);
+			size_t addend = stride==3 ? rel[2] : *rel_addr;
+			*rel_addr = (size_t)base + addend;
+		}
+	}
+
+	const char *strings = (void *)(base + dyn[DT_STRTAB]);
+	const Sym *syms = (void *)(base + dyn[DT_SYMTAB]);
+
+	/* Call dynamic linker stage-2, __dls2 */
+	for (i=0; ;i++) {
+		const char *s = strings + syms[i].st_name;
+		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+		 && s[3]=='l' && s[4]=='s' && s[5]=='2' && !s[6])
+			break;
+	}
+	((stage2_func)(base + syms[i].st_value))(base);
+
+	/* Call dynamic linker stage-3, __dls3 */
+	for (i=0; ;i++) {
+		const char *s = strings + syms[i].st_name;
+		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+		 && s[3]=='l' && s[4]=='s' && s[5]=='3' && !s[6])
+			break;
+	}
+	((stage3_func)(base + syms[i].st_value))(sp);
+}
+
+#endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f6ed8011..1008e3ea 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -19,26 +19,13 @@
 #include <dlfcn.h>
 #include "pthread_impl.h"
 #include "libc.h"
+#include "dynlink.h"
 
 static int errflag;
 static char errbuf[128];
 
 #ifdef SHARED
 
-#if ULONG_MAX == 0xffffffff
-typedef Elf32_Ehdr Ehdr;
-typedef Elf32_Phdr Phdr;
-typedef Elf32_Sym Sym;
-#define R_TYPE(x) ((x)&255)
-#define R_SYM(x) ((x)>>8)
-#else
-typedef Elf64_Ehdr Ehdr;
-typedef Elf64_Phdr Phdr;
-typedef Elf64_Sym Sym;
-#define R_TYPE(x) ((x)&0xffffffff)
-#define R_SYM(x) ((x)>>32)
-#endif
-
 #define MAXP2(a,b) (-(-(a)&-(b)))
 #define ALIGN(x,y) ((x)+(y)-1 & -(y))
 
@@ -88,6 +75,7 @@ struct dso {
 	volatile int new_dtv_idx, new_tls_idx;
 	struct td_index *td_index;
 	struct dso *fini_next;
+	int rel_early_relative, rel_update_got;
 	char *shortname;
 	char buf[];
 };
@@ -97,26 +85,6 @@ struct symdef {
 	struct dso *dso;
 };
 
-enum {
-	REL_ERR,
-	REL_SYMBOLIC,
-	REL_GOT,
-	REL_PLT,
-	REL_RELATIVE,
-	REL_OFFSET,
-	REL_OFFSET32,
-	REL_COPY,
-	REL_SYM_OR_REL,
-	REL_TLS, /* everything past here is TLS */
-	REL_DTPMOD,
-	REL_DTPOFF,
-	REL_TPOFF,
-	REL_TPOFF_NEG,
-	REL_TLSDESC,
-};
-
-#include "reloc.h"
-
 int __init_tp(void *);
 void __init_libc(char **, char *);
 
@@ -129,7 +97,8 @@ static struct builtin_tls {
 } builtin_tls[1];
 #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
 
-static struct dso *head, *tail, *ldso, *fini_head;
+static struct dso ldso;
+static struct dso *head, *tail, *fini_head;
 static char *env_path, *sys_path;
 static unsigned long long gencnt;
 static int runtime;
@@ -145,14 +114,19 @@ static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
 
 struct debug *_dl_debug_addr = &debug;
 
-#define AUX_CNT 38
-#define DYN_CNT 34
+static int dl_strcmp(const char *l, const char *r)
+{
+	for (; *l==*r && *l; l++, r++);
+	return *(unsigned char *)l - *(unsigned char *)r;
+}
+#define strcmp(l,r) dl_strcmp(l,r)
 
 static void decode_vec(size_t *v, size_t *a, size_t cnt)
 {
-	memset(a, 0, cnt*sizeof(size_t));
-	for (; v[0]; v+=2) if (v[0]<cnt) {
-		a[0] |= 1ULL<<v[0];
+	size_t i;
+	for (i=0; i<cnt; i++) a[i] = 0;
+	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
+		a[0] |= 1UL<<v[0];
 		a[v[0]] = v[1];
 	}
 }
@@ -276,8 +250,6 @@ static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 	return def;
 }
 
-#define NO_INLINE_ADDEND (1<<REL_COPY | 1<<REL_GOT | 1<<REL_PLT)
-
 ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
 
 static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
@@ -288,7 +260,7 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	Sym *sym;
 	const char *name;
 	void *ctx;
-	int astype, type;
+	int type;
 	int sym_index;
 	struct symdef def;
 	size_t *reloc_addr;
@@ -297,14 +269,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	size_t addend;
 
 	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-		astype = R_TYPE(rel[1]);
-		if (!astype) continue;
-		type = remap_rel(astype);
-		if (!type) {
-			error("Error relocating %s: unsupported relocation type %d",
-				dso->name, astype);
-			continue;
-		}
+		if (dso->rel_early_relative && IS_RELATIVE(rel[1])) continue;
+		type = R_TYPE(rel[1]);
 		sym_index = R_SYM(rel[1]);
 		reloc_addr = (void *)(base + rel[0]);
 		if (sym_index) {
@@ -324,14 +290,19 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			def.dso = dso;
 		}
 
+		int gotplt = (type == REL_GOT || type == REL_PLT);
+		if (dso->rel_update_got && !gotplt) continue;
+
 		addend = stride>2 ? rel[2]
-			: (1<<type & NO_INLINE_ADDEND) ? 0
+			: gotplt || type==REL_COPY ? 0
 			: *reloc_addr;
 
 		sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
 		tls_val = def.sym ? def.sym->st_value : 0;
 
 		switch(type) {
+		case REL_NONE:
+			break;
 		case REL_OFFSET:
 			addend -= (size_t)reloc_addr;
 		case REL_SYMBOLIC:
@@ -395,6 +366,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 #endif
 			}
 			break;
+		default:
+			error("Error relocating %s: unsupported relocation type %d",
+				dso->name, type);
+			continue;
 		}
 	}
 }
@@ -711,22 +686,22 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 					if (!(reported & mask)) {
 						reported |= mask;
 						dprintf(1, "\t%s => %s (%p)\n",
-							name, ldso->name,
-							ldso->base);
+							name, ldso.name,
+							ldso.base);
 					}
 				}
 				is_self = 1;
 			}
 		}
 	}
-	if (!strcmp(name, ldso->name)) is_self = 1;
+	if (!strcmp(name, ldso.name)) is_self = 1;
 	if (is_self) {
-		if (!ldso->prev) {
-			tail->next = ldso;
-			ldso->prev = tail;
-			tail = ldso->next ? ldso->next : ldso;
+		if (!ldso.prev) {
+			tail->next = &ldso;
+			ldso.prev = tail;
+			tail = ldso.next ? ldso.next : &ldso;
 		}
-		return ldso;
+		return &ldso;
 	}
 	if (strchr(name, '/')) {
 		pathname = name;
@@ -752,13 +727,13 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 			if (!sys_path) {
 				char *prefix = 0;
 				size_t prefix_len;
-				if (ldso->name[0]=='/') {
+				if (ldso.name[0]=='/') {
 					char *s, *t, *z;
-					for (s=t=z=ldso->name; *s; s++)
+					for (s=t=z=ldso.name; *s; s++)
 						if (*s=='/') z=t, t=s;
-					prefix_len = z-ldso->name;
+					prefix_len = z-ldso.name;
 					if (prefix_len < PATH_MAX)
-						prefix = ldso->name;
+						prefix = ldso.name;
 				}
 				if (!prefix) {
 					prefix = "";
@@ -910,21 +885,40 @@ static void make_global(struct dso *p)
 	for (; p; p=p->next) p->global = 1;
 }
 
+static void do_mips_relocs(struct dso *p, size_t *got)
+{
+	size_t i, j, rel[2];
+	unsigned char *base = p->base;
+	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
+	if (p->rel_early_relative) {
+		got += i;
+	} else {
+		while (i--) *got++ += (size_t)base;
+	}
+	j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
+	i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
+	Sym *sym = p->syms + j;
+	rel[0] = (unsigned char *)got - base;
+	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
+		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
+		do_relocs(p, rel, sizeof rel, 2);
+	}
+}
+
 static void reloc_all(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
 	for (; p; p=p->next) {
 		if (p->relocated) continue;
 		decode_vec(p->dynv, dyn, DYN_CNT);
-#ifdef NEED_ARCH_RELOCS
-		do_arch_relocs(p, head);
-#endif
+		if (NEED_MIPS_GOT_RELOCS)
+			do_mips_relocs(p, (void *)(p->base+dyn[DT_PLTGOT]));
 		do_relocs(p, (void *)(p->base+dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
 			2+(dyn[DT_PLTREL]==DT_RELA));
 		do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
 		do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
 
-		if (p->relro_start != p->relro_end &&
+		if (head != &ldso && p->relro_start != p->relro_end &&
 		    mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
 			error("Error relocating %s: RELRO protection failed: %m",
 				p->name);
@@ -1121,19 +1115,52 @@ static void update_tls_size()
 	tls_align);
 }
 
-void *__dynlink(int argc, char **argv)
+/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
+ * following stage 2 and stage 3 functions via primitive symbolic lookup
+ * since it does not have access to their addresses to begin with. */
+
+/* Stage 2 of the dynamic linker is called after relative relocations 
+ * have been processed. It can make function calls to static functions
+ * and access string literals and static data, but cannot use extern
+ * symbols. Its job is to perform symbolic relocations on the dynamic
+ * linker itself, but some of the relocations performed may need to be
+ * replaced later due to copy relocations in the main program. */
+
+void __dls2(unsigned char *base)
 {
-	size_t aux[AUX_CNT] = {0};
+	Ehdr *ehdr = (void *)base;
+	ldso.base = base;
+	ldso.name = ldso.shortname = "libc.so";
+	ldso.global = 1;
+	ldso.phnum = ehdr->e_phnum;
+	ldso.phdr = (void *)(base + ehdr->e_phoff);
+	ldso.phentsize = ehdr->e_phentsize;
+	ldso.rel_early_relative = 1;
+	kernel_mapped_dso(&ldso);
+	decode_dyn(&ldso);
+
+	head = &ldso;
+	reloc_all(&ldso);
+
+	ldso.relocated = 0;
+	ldso.rel_update_got = 1;
+}
+
+/* Stage 3 of the dynamic linker is called with the dynamic linker/libc
+ * fully functional. Its job is to load (if not already loaded) and
+ * process dependencies and relocations for the main application and
+ * transfer control to its entry point. */
+
+_Noreturn void __dls3(size_t *sp)
+{
+	static struct dso app, vdso;
+	size_t aux[AUX_CNT] = {0}, *auxv;
 	size_t i;
-	Phdr *phdr;
-	Ehdr *ehdr;
-	static struct dso builtin_dsos[3];
-	struct dso *const app = builtin_dsos+0;
-	struct dso *const lib = builtin_dsos+1;
-	struct dso *const vdso = builtin_dsos+2;
 	char *env_preload=0;
 	size_t vdso_base;
-	size_t *auxv;
+	int argc = *sp;
+	char **argv = (void *)(sp+1);
+	char **argv_orig = argv;
 	char **envp = argv+argc+1;
 	void *initial_tls;
 
@@ -1157,60 +1184,42 @@ void *__dynlink(int argc, char **argv)
 	libc.page_size = aux[AT_PAGESZ];
 	libc.auxv = auxv;
 
-	/* If the dynamic linker was invoked as a program itself, AT_BASE
-	 * will not be set. In that case, we assume the base address is
-	 * the start of the page containing the PHDRs; I don't know any
-	 * better approach... */
-	if (!aux[AT_BASE]) {
-		aux[AT_BASE] = aux[AT_PHDR] & -PAGE_SIZE;
-		aux[AT_PHDR] = aux[AT_PHENT] = aux[AT_PHNUM] = 0;
-	}
-
-	/* The dynamic linker load address is passed by the kernel
-	 * in the AUX vector, so this is easy. */
-	lib->base = (void *)aux[AT_BASE];
-	lib->name = lib->shortname = "libc.so";
-	lib->global = 1;
-	ehdr = (void *)lib->base;
-	lib->phnum = ehdr->e_phnum;
-	lib->phdr = (void *)(aux[AT_BASE]+ehdr->e_phoff);
-	lib->phentsize = ehdr->e_phentsize;
-	kernel_mapped_dso(lib);
-	decode_dyn(lib);
-
-	if (aux[AT_PHDR]) {
+	/* If the main program was already loaded by the kernel,
+	 * AT_PHDR will point to some location other than the dynamic
+	 * linker's program headers. */
+	if (aux[AT_PHDR] != (size_t)ldso.phdr) {
 		size_t interp_off = 0;
 		size_t tls_image = 0;
 		/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
-		app->phdr = phdr = (void *)aux[AT_PHDR];
-		app->phnum = aux[AT_PHNUM];
-		app->phentsize = aux[AT_PHENT];
+		Phdr *phdr = app.phdr = (void *)aux[AT_PHDR];
+		app.phnum = aux[AT_PHNUM];
+		app.phentsize = aux[AT_PHENT];
 		for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
 			if (phdr->p_type == PT_PHDR)
-				app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
+				app.base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
 			else if (phdr->p_type == PT_INTERP)
 				interp_off = (size_t)phdr->p_vaddr;
 			else if (phdr->p_type == PT_TLS) {
 				tls_image = phdr->p_vaddr;
-				app->tls_len = phdr->p_filesz;
-				app->tls_size = phdr->p_memsz;
-				app->tls_align = phdr->p_align;
+				app.tls_len = phdr->p_filesz;
+				app.tls_size = phdr->p_memsz;
+				app.tls_align = phdr->p_align;
 			}
 		}
-		if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
-		if (interp_off) lib->name = (char *)app->base + interp_off;
+		if (app.tls_size) app.tls_image = (char *)app.base + tls_image;
+		if (interp_off) ldso.name = (char *)app.base + interp_off;
 		if ((aux[0] & (1UL<<AT_EXECFN))
 		    && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
-			app->name = (char *)aux[AT_EXECFN];
+			app.name = (char *)aux[AT_EXECFN];
 		else
-			app->name = argv[0];
-		kernel_mapped_dso(app);
+			app.name = argv[0];
+		kernel_mapped_dso(&app);
 	} else {
 		int fd;
 		char *ldname = argv[0];
 		size_t l = strlen(ldname);
 		if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
-		*argv++ = (void *)-1;
+		argv++;
 		while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
 			char *opt = argv[0]+2;
 			*argv++ = (void *)-1;
@@ -1229,8 +1238,8 @@ void *__dynlink(int argc, char **argv)
 			} else {
 				argv[0] = 0;
 			}
-			argv[-1] = (void *)-1;
 		}
+		argv[-1] = (void *)(argc - (argv-argv_orig));
 		if (!argv[0]) {
 			dprintf(2, "musl libc\n"
 				"Version %s\n"
@@ -1246,96 +1255,88 @@ void *__dynlink(int argc, char **argv)
 			_exit(1);
 		}
 		runtime = 1;
-		ehdr = (void *)map_library(fd, app);
+		Ehdr *ehdr = (void *)map_library(fd, &app);
 		if (!ehdr) {
 			dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
 			_exit(1);
 		}
 		runtime = 0;
 		close(fd);
-		lib->name = ldname;
-		app->name = argv[0];
-		aux[AT_ENTRY] = (size_t)app->base + ehdr->e_entry;
+		ldso.name = ldname;
+		app.name = argv[0];
+		aux[AT_ENTRY] = (size_t)app.base + ehdr->e_entry;
 		/* Find the name that would have been used for the dynamic
 		 * linker had ldd not taken its place. */
 		if (ldd_mode) {
-			for (i=0; i<app->phnum; i++) {
-				if (app->phdr[i].p_type == PT_INTERP)
-					lib->name = (void *)(app->base
-						+ app->phdr[i].p_vaddr);
+			for (i=0; i<app.phnum; i++) {
+				if (app.phdr[i].p_type == PT_INTERP)
+					ldso.name = (void *)(app.base
+						+ app.phdr[i].p_vaddr);
 			}
-			dprintf(1, "\t%s (%p)\n", lib->name, lib->base);
+			dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
 		}
 	}
-	if (app->tls_size) {
-		app->tls_id = tls_cnt = 1;
+	if (app.tls_size) {
+		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
-		app->tls_offset = 0;
-		tls_offset = app->tls_size
-			+ ( -((uintptr_t)app->tls_image + app->tls_size)
-			& (app->tls_align-1) );
+		app.tls_offset = 0;
+		tls_offset = app.tls_size
+			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+			& (app.tls_align-1) );
 #else
-		tls_offset = app->tls_offset = app->tls_size
-			+ ( -((uintptr_t)app->tls_image + app->tls_size)
-			& (app->tls_align-1) );
+		tls_offset = app.tls_offset = app.tls_size
+			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+			& (app.tls_align-1) );
 #endif
-		tls_align = MAXP2(tls_align, app->tls_align);
+		tls_align = MAXP2(tls_align, app.tls_align);
 	}
-	app->global = 1;
-	decode_dyn(app);
+	app.global = 1;
+	decode_dyn(&app);
 
 	/* Attach to vdso, if provided by the kernel */
 	if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
-		ehdr = (void *)vdso_base;
-		vdso->phdr = phdr = (void *)(vdso_base + ehdr->e_phoff);
-		vdso->phnum = ehdr->e_phnum;
-		vdso->phentsize = ehdr->e_phentsize;
+		Ehdr *ehdr = (void *)vdso_base;
+		Phdr *phdr = vdso.phdr = (void *)(vdso_base + ehdr->e_phoff);
+		vdso.phnum = ehdr->e_phnum;
+		vdso.phentsize = ehdr->e_phentsize;
 		for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
 			if (phdr->p_type == PT_DYNAMIC)
-				vdso->dynv = (void *)(vdso_base + phdr->p_offset);
+				vdso.dynv = (void *)(vdso_base + phdr->p_offset);
 			if (phdr->p_type == PT_LOAD)
-				vdso->base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
+				vdso.base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
 		}
-		vdso->name = "";
-		vdso->shortname = "linux-gate.so.1";
-		vdso->global = 1;
-		decode_dyn(vdso);
-		vdso->prev = lib;
-		lib->next = vdso;
+		vdso.name = "";
+		vdso.shortname = "linux-gate.so.1";
+		vdso.global = 1;
+		vdso.relocated = 1;
+		decode_dyn(&vdso);
+		vdso.prev = &ldso;
+		ldso.next = &vdso;
 	}
 
-	/* Initial dso chain consists only of the app. We temporarily
-	 * append the dynamic linker/libc so we can relocate it, then
-	 * restore the initial chain in preparation for loading third
-	 * party libraries (preload/needed). */
-	head = tail = app;
-	ldso = lib;
-	app->next = lib;
-	reloc_all(lib);
-	app->next = 0;
-
-	/* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
+	/* Initial dso chain consists only of the app. */
+	head = tail = &app;
 
 	/* Donate unused parts of app and library mapping to malloc */
-	reclaim_gaps(app);
-	reclaim_gaps(lib);
+	reclaim_gaps(&app);
+	reclaim_gaps(&ldso);
 
 	/* Load preload/needed libraries, add their symbols to the global
-	 * namespace, and perform all remaining relocations. The main
-	 * program must be relocated LAST since it may contain copy
-	 * relocations which depend on libraries' relocations. */
+	 * namespace, and perform all remaining relocations. */
 	if (env_preload) load_preload(env_preload);
-	load_deps(app);
-	make_global(app);
+	load_deps(&app);
+	make_global(&app);
 
 #ifndef DYNAMIC_IS_RO
-	for (i=0; app->dynv[i]; i+=2)
-		if (app->dynv[i]==DT_DEBUG)
-			app->dynv[i+1] = (size_t)&debug;
+	for (i=0; app.dynv[i]; i+=2)
+		if (app.dynv[i]==DT_DEBUG)
+			app.dynv[i+1] = (size_t)&debug;
 #endif
 
-	reloc_all(app->next);
-	reloc_all(app);
+	/* The main program must be relocated LAST since it may contin
+	 * copy relocations which depend on libraries' relocations. */
+	reloc_all(app.next);
+	reloc_all(&app);
 
 	update_tls_size();
 	if (libc.tls_size > sizeof builtin_tls) {
@@ -1359,14 +1360,13 @@ void *__dynlink(int argc, char **argv)
 
 	/* Switch to runtime mode: any further failures in the dynamic
 	 * linker are a reportable failure rather than a fatal startup
-	 * error. If the dynamic loader (dlopen) will not be used, free
-	 * all memory used by the dynamic linker. */
+	 * error. */
 	runtime = 1;
 
 	debug.ver = 1;
 	debug.bp = _dl_debug_state;
 	debug.head = head;
-	debug.base = lib->base;
+	debug.base = ldso.base;
 	debug.state = 0;
 	_dl_debug_state();
 
@@ -1375,7 +1375,8 @@ void *__dynlink(int argc, char **argv)
 	errno = 0;
 	do_init_fini(tail);
 
-	return (void *)aux[AT_ENTRY];
+	CRTJMP((void *)aux[AT_ENTRY], argv-1);
+	for(;;);
 }
 
 void *dlopen(const char *file, int mode)
diff --git a/src/ldso/i386/start.s b/src/ldso/i386/start.s
deleted file mode 100644
index c37a1faa..00000000
--- a/src/ldso/i386/start.s
+++ /dev/null
@@ -1,22 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	xor %ebp,%ebp
-	pop %edi
-	mov %esp,%esi
-	and $-16,%esp
-	push %ebp
-	push %ebp
-	push %esi
-	push %edi
-	call __dynlink
-	mov %esi,%esp
-1:	dec %edi
-	pop %esi
-	cmp $-1,%esi
-	jz 1b
-	inc %edi
-	push %esi
-	push %edi
-	xor %edx,%edx
-	jmp *%eax
diff --git a/src/ldso/microblaze/start.s b/src/ldso/microblaze/start.s
deleted file mode 100644
index 067e8613..00000000
--- a/src/ldso/microblaze/start.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# FIXME: clearing argv entries
-.global _dlstart
-_dlstart:
-	add     r19, r0, r0
-
-	lw      r5, r0, r1
-	addi    r6, r1, 4
-	mfs     r7, rpc
-	addi    r7, r7, _GLOBAL_OFFSET_TABLE_+8
-	addi    r7, r7, _DYNAMIC@GOTOFF
-	brlid   r15, __reloc_self@PLT
-	addik   r1, r1, -16
-
-	lwi     r5, r1, 16
-	brlid   r15, __dynlink@PLT
-	addi    r6, r1, 20
-	addik   r1, r1, 16
-
-	lwi     r4, r1, 0
-1:	lwi     r5, r1, 4
-	addi    r5, r5, 1
-	bnei    r5, 1f
-	addi    r4, r4, -1
-	addi    r1, r1, 4
-	bri     1b
-1:	swi     r4, r1, 0
-	add     r5, r0, r0
-	bra     r3
diff --git a/src/ldso/mips/start.s b/src/ldso/mips/start.s
deleted file mode 100644
index 0cadbf8a..00000000
--- a/src/ldso/mips/start.s
+++ /dev/null
@@ -1,46 +0,0 @@
-.hidden _DYNAMIC
-.hidden __reloc_self
-.set noreorder
-.set nomacro
-.global _dlstart
-.type _dlstart,@function
-_dlstart:
-	move $fp, $0
-
-	bgezal $0, 1f
-	nop
-2:	.gpword 2b
-	.gpword _DYNAMIC
-	.gpword __reloc_self
-1:	lw $gp, 0($ra)
-	subu $gp, $ra, $gp
-
-	lw $4, 0($sp)
-	addiu $5, $sp, 4
-	lw $6, 4($ra)
-	addu $6, $6, $gp
-	addiu $7, $gp, -0x7ff0
-	subu $sp, $sp, 16
-	lw $25, 8($ra)
-	add $25, $25, $gp
-	jalr $25
-	nop
-
-	lw $25, %call16(__dynlink)($gp)
-	lw $4, 16($sp)
-	addiu $5, $sp, 20
-	jalr $25
-	nop
-
-	add $sp, $sp, 16
-	li $6, -1
-	lw $4, ($sp)
-1:	lw $5, 4($sp)
-	bne $5, $6, 2f
-	nop
-	addu $sp, $sp, 4
-	addu $4, $4, -1
-	b 1b
-	nop
-2:	sw $4, ($sp)
-	jr $2
diff --git a/src/ldso/or1k/start.s b/src/ldso/or1k/start.s
deleted file mode 100644
index 83b7c2c6..00000000
--- a/src/ldso/or1k/start.s
+++ /dev/null
@@ -1,34 +0,0 @@
-.global _dlstart
-_dlstart:
-	l.jal	1f
-	 l.nop
-1:	l.movhi	r5, gotpchi(_GLOBAL_OFFSET_TABLE_+0)
-	l.ori	r5, r5, gotpclo(_GLOBAL_OFFSET_TABLE_+4)
-	l.add	r5, r5, r9
-	l.movhi	r3, gotoffhi(_DYNAMIC)
-	l.ori	r3, r3, gotofflo(_DYNAMIC)
-	l.add	r5, r5, r3
-
-	l.lwz	r3, 0(r1)
-	l.addi	r4, r1, 4
-	l.jal	plt(__reloc_self)
-	 l.addi	r1, r1, -16
-
-	l.lwz	r3, 16(r1)
-	l.jal	plt(__dynlink)
-	 l.addi	r4, r1, 20
-	l.addi	r1, r1, 16
-
-	l.lwz	r4, 0(r1)
-1:	l.addi	r4, r4, -1
-	l.lwz	r5, 4(r1)
-	l.sfeqi	r5, -1
-	l.bf	1b
-	 l.addi	r1, r1, 4
-
-	l.addi	r4, r4, 1
-	l.addi	r1, r1, -4
-	l.sw	0(r1), r4
-
-	l.jr	r11
-	 l.ori	r3, r0, 0
diff --git a/src/ldso/powerpc/start.s b/src/ldso/powerpc/start.s
deleted file mode 100644
index 6548d58f..00000000
--- a/src/ldso/powerpc/start.s
+++ /dev/null
@@ -1,29 +0,0 @@
-	.global _dlstart
-	.type   _dlstart,@function
-_dlstart:
-	bl      1f
-2:	.long   _DYNAMIC-2b
-1:	mflr    5
-	lwz     0, 0(5)
-	add     5, 0, 5
-	lwz     3, 0(1)
-	addi    4, 1, 4
-	addi    1, 1, -16
-	bl      __reloc_self
-
-	lwz     3, 16(1)
-	addi    4, 1, 20
-	bl      __dynlink
-	addi    1, 1, 16
-
-	lwz     4, 0(1)
-1:	addi    4, 4, -1
-	lwzu    5, 4(1)
-	cmpwi   5, -1
-	beq-    1b
-	addi    4, 4, 1
-	stwu    4, -4(1)
-
-	mtlr    3
-	li      3, 0
-	blr
diff --git a/src/ldso/sh/start.s b/src/ldso/sh/start.s
deleted file mode 100644
index 0d2d9136..00000000
--- a/src/ldso/sh/start.s
+++ /dev/null
@@ -1,26 +0,0 @@
-.text
-.global _dlstart
-.type   _dlstart, @function
-_dlstart:
-	mov.l  @r15, r4
-	mov    r15, r5
-	mov.l  L1, r0
-	bsrf   r0
-	 add   #4, r5
-
-2:	mov    r0, r2
-	mov.l  @r15+, r1
-1:	mov.l  @r15+, r0
-	cmp/eq #-1, r0
-	bt/s   1b
-	 add   #-1, r1
-
-	add    #1, r1
-	mov.l  r0, @-r15
-	mov.l  r1, @-r15
-	mov    #0, r4
-	jmp    @r2
-	 nop
-
-.align 2
-L1:	.long __dynlink@PLT-(2b-.)
diff --git a/src/ldso/start.c b/src/ldso/start.c
deleted file mode 100644
index 3471f6ce..00000000
--- a/src/ldso/start.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <stdlib.h>
-
-/* stub for archs that lack dynamic linker support */
-
-void _dlstart()
-{
-	_Exit(1);
-}
diff --git a/src/ldso/x32/start.s b/src/ldso/x32/start.s
deleted file mode 100644
index 3c3800aa..00000000
--- a/src/ldso/x32/start.s
+++ /dev/null
@@ -1,24 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	mov (%rsp),%rdi  /* move argc into 1st argument slot */
-	lea 4(%rsp),%rsi /* move argv into 2nd argument slot */
-	call __dynlink
-	/* in case the dynlinker was called directly, it sets the "consumed"
-	   argv values to -1. so we must loop over the array as long as -1
-	   is in the top argv slot, decrement argc, and then set the stackpointer
-	   to the new argc as well as argc's new value.
-	   as the x32 abi has longs in the argv array, we cannot use push/pop.*/
-	movl (%rsp),%edi /* copy argc into edi */
-	xor %rdx,%rdx /* we use rdx as an offset to the current argv member */
-1:	dec %edi
-	addl $4, %edx
-	movl (%rsp, %rdx), %esi
-	cmp $-1,%esi
-	jz 1b
-	inc %edi
-	subl $4, %edx
-	lea (%rsp, %rdx), %rsp /* set rsp to new argv[-1] */
-	movl %edi, (%rsp)      /* write new argc there */
-	xor %edx,%edx
-	jmp *%rax
diff --git a/src/ldso/x86_64/start.s b/src/ldso/x86_64/start.s
deleted file mode 100644
index 1c5598aa..00000000
--- a/src/ldso/x86_64/start.s
+++ /dev/null
@@ -1,16 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	mov (%rsp),%rdi
-	lea 8(%rsp),%rsi
-	call __dynlink
-	pop %rdi
-1:	dec %edi
-	pop %rsi
-	cmp $-1,%rsi
-	jz 1b
-	inc %edi
-	push %rsi
-	push %rdi
-	xor %edx,%edx
-	jmp *%rax