about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/internal/dynlink.h57
-rw-r--r--src/ldso/aarch64/start.s18
-rw-r--r--src/ldso/arm/start.s18
-rw-r--r--src/ldso/dlstart.c107
-rw-r--r--src/ldso/dynlink.c347
-rw-r--r--src/ldso/i386/start.s22
-rw-r--r--src/ldso/microblaze/start.s28
-rw-r--r--src/ldso/mips/start.s46
-rw-r--r--src/ldso/or1k/start.s34
-rw-r--r--src/ldso/powerpc/start.s29
-rw-r--r--src/ldso/sh/start.s26
-rw-r--r--src/ldso/start.c8
-rw-r--r--src/ldso/x32/start.s24
-rw-r--r--src/ldso/x86_64/start.s16
14 files changed, 338 insertions, 442 deletions
diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
new file mode 100644
index 00000000..53661d62
--- /dev/null
+++ b/src/internal/dynlink.h
@@ -0,0 +1,57 @@
+#ifndef _INTERNAL_RELOC_H
+#define _INTERNAL_RELOC_H
+
+#include <features.h>
+#include <elf.h>
+#include <stdint.h>
+
+#if UINTPTR_MAX == 0xffffffff
+typedef Elf32_Ehdr Ehdr;
+typedef Elf32_Phdr Phdr;
+typedef Elf32_Sym Sym;
+#define R_TYPE(x) ((x)&255)
+#define R_SYM(x) ((x)>>8)
+#else
+typedef Elf64_Ehdr Ehdr;
+typedef Elf64_Phdr Phdr;
+typedef Elf64_Sym Sym;
+#define R_TYPE(x) ((x)&0x7fffffff)
+#define R_SYM(x) ((x)>>32)
+#endif
+
+/* These enum constants provide unmatchable default values for
+ * any relocation type the arch does not use. */
+enum {
+	REL_NONE = 0,
+	REL_SYMBOLIC = -100,
+	REL_GOT,
+	REL_PLT,
+	REL_RELATIVE,
+	REL_OFFSET,
+	REL_OFFSET32,
+	REL_COPY,
+	REL_SYM_OR_REL,
+	REL_DTPMOD,
+	REL_DTPOFF,
+	REL_TPOFF,
+	REL_TPOFF_NEG,
+	REL_TLSDESC,
+};
+
+#include "reloc.h"
+
+#define IS_RELATIVE(x) ( \
+	(R_TYPE(x) == REL_RELATIVE) || \
+	(R_TYPE(x) == REL_SYM_OR_REL && !R_SYM(x)) )
+
+#ifndef NEED_MIPS_GOT_RELOCS
+#define NEED_MIPS_GOT_RELOCS 0
+#endif
+
+#define AUX_CNT 32
+#define DYN_CNT 32
+
+typedef void (*stage2_func)(unsigned char *);
+typedef _Noreturn void (*stage3_func)(size_t *);
+
+#endif
diff --git a/src/ldso/aarch64/start.s b/src/ldso/aarch64/start.s
deleted file mode 100644
index 41d1d1e2..00000000
--- a/src/ldso/aarch64/start.s
+++ /dev/null
@@ -1,18 +0,0 @@
-.global _dlstart
-_dlstart:
-	ldr x0,[sp]
-	add x1,sp,#8
-	bl __dynlink
-	mov x1,sp
-	ldr x2,[x1],#8
-1:	sub x2,x2,1
-	ldr x3,[x1],#8
-	cmn x3,#1
-	b.eq 1b
-	add x2,x2,1
-	str x3,[x1,#-8]!
-	str x2,[x1,#-8]!
-	mov sp,x1
-	mov x1,x0
-	mov x0,#0
-	blr x1
diff --git a/src/ldso/arm/start.s b/src/ldso/arm/start.s
deleted file mode 100644
index 5dd93b55..00000000
--- a/src/ldso/arm/start.s
+++ /dev/null
@@ -1,18 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	ldr r0,[sp]
-	add r1,sp,#4
-	bl __dynlink
-	pop {r1}
-1:	sub r1,r1,#1
-	pop {r2}
-	cmp r2,#-1
-	beq 1b
-	add r1,r1,#1
-	push {r1,r2}
-	mov r1,r0
-	mov r0,#0
-	tst r1,#1
-	moveq pc,r1
-	bx r1
diff --git a/src/ldso/dlstart.c b/src/ldso/dlstart.c
new file mode 100644
index 00000000..5bd2a080
--- /dev/null
+++ b/src/ldso/dlstart.c
@@ -0,0 +1,107 @@
+#include <stddef.h>
+#include "dynlink.h"
+
+#ifdef SHARED
+
+#ifndef START
+#define START "_dlstart"
+#endif
+
+#include "crt_arch.h"
+
+void _dlstart_c(size_t *sp, size_t *dynv)
+{
+	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
+
+	int argc = *sp;
+	char **argv = (void *)(sp+1);
+
+	for (i=argc+1; argv[i]; i++);
+	size_t *auxv = (void *)(argv+i+1);
+
+	for (i=0; i<AUX_CNT; i++) aux[i] = 0;
+	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT)
+		aux[auxv[i]] = auxv[i+1];
+
+	for (i=0; i<DYN_CNT; i++) dyn[i] = 0;
+	for (i=0; dynv[i]; i+=2) if (dynv[i]<DYN_CNT)
+		dyn[dynv[i]] = dynv[i+1];
+
+	/* If the dynamic linker is invoked as a command, its load
+	 * address is not available in the aux vector. Instead, compute
+	 * the load address as the difference between &_DYNAMIC and the
+	 * virtual address in the PT_DYNAMIC program header. */
+	unsigned char *base = (void *)aux[AT_BASE];
+	if (!base) {
+		size_t phnum = aux[AT_PHNUM];
+		size_t phentsize = aux[AT_PHENT];
+		Phdr *ph = (void *)aux[AT_PHDR];
+		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
+			if (ph->p_type == PT_DYNAMIC) {
+				base = (void *)((size_t)dynv - ph->p_vaddr);
+				break;
+			}
+		}
+	}
+
+	/* MIPS uses an ugly packed form for GOT relocations. Since we
+	 * can't make function calls yet and the code is tiny anyway,
+	 * it's simply inlined here. */
+	if (NEED_MIPS_GOT_RELOCS) {
+		size_t local_cnt = 0;
+		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
+		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
+			local_cnt = dynv[i+1];
+		for (i=0; i<local_cnt; i++) got[i] += (size_t)base;
+	}
+
+	/* The use of the reloc_info structure and nested loops is a trick
+	 * to work around the fact that we can't necessarily make function
+	 * calls yet. Each struct in the array serves like the arguments
+	 * to a function call. */
+	struct {
+		void *rel;
+		size_t size;
+		size_t stride;
+	} reloc_info[] = {
+		{ base+dyn[DT_JMPREL], dyn[DT_PLTRELSZ], 2+(dyn[DT_PLTREL]==DT_RELA) },
+		{ base+dyn[DT_REL], dyn[DT_RELSZ], 2 },
+		{ base+dyn[DT_RELA], dyn[DT_RELASZ], 3 },
+		{ 0, 0, 0 }
+	};
+
+	for (i=0; reloc_info[i].stride; i++) {
+		size_t *rel = reloc_info[i].rel;
+		size_t rel_size = reloc_info[i].size;
+		size_t stride = reloc_info[i].stride;
+		for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+			if (!IS_RELATIVE(rel[1])) continue;
+			size_t *rel_addr = (void *)(base + rel[0]);
+			size_t addend = stride==3 ? rel[2] : *rel_addr;
+			*rel_addr = (size_t)base + addend;
+		}
+	}
+
+	const char *strings = (void *)(base + dyn[DT_STRTAB]);
+	const Sym *syms = (void *)(base + dyn[DT_SYMTAB]);
+
+	/* Call dynamic linker stage-2, __dls2 */
+	for (i=0; ;i++) {
+		const char *s = strings + syms[i].st_name;
+		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+		 && s[3]=='l' && s[4]=='s' && s[5]=='2' && !s[6])
+			break;
+	}
+	((stage2_func)(base + syms[i].st_value))(base);
+
+	/* Call dynamic linker stage-3, __dls3 */
+	for (i=0; ;i++) {
+		const char *s = strings + syms[i].st_name;
+		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+		 && s[3]=='l' && s[4]=='s' && s[5]=='3' && !s[6])
+			break;
+	}
+	((stage3_func)(base + syms[i].st_value))(sp);
+}
+
+#endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f6ed8011..1008e3ea 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -19,26 +19,13 @@
 #include <dlfcn.h>
 #include "pthread_impl.h"
 #include "libc.h"
+#include "dynlink.h"
 
 static int errflag;
 static char errbuf[128];
 
 #ifdef SHARED
 
-#if ULONG_MAX == 0xffffffff
-typedef Elf32_Ehdr Ehdr;
-typedef Elf32_Phdr Phdr;
-typedef Elf32_Sym Sym;
-#define R_TYPE(x) ((x)&255)
-#define R_SYM(x) ((x)>>8)
-#else
-typedef Elf64_Ehdr Ehdr;
-typedef Elf64_Phdr Phdr;
-typedef Elf64_Sym Sym;
-#define R_TYPE(x) ((x)&0xffffffff)
-#define R_SYM(x) ((x)>>32)
-#endif
-
 #define MAXP2(a,b) (-(-(a)&-(b)))
 #define ALIGN(x,y) ((x)+(y)-1 & -(y))
 
@@ -88,6 +75,7 @@ struct dso {
 	volatile int new_dtv_idx, new_tls_idx;
 	struct td_index *td_index;
 	struct dso *fini_next;
+	int rel_early_relative, rel_update_got;
 	char *shortname;
 	char buf[];
 };
@@ -97,26 +85,6 @@ struct symdef {
 	struct dso *dso;
 };
 
-enum {
-	REL_ERR,
-	REL_SYMBOLIC,
-	REL_GOT,
-	REL_PLT,
-	REL_RELATIVE,
-	REL_OFFSET,
-	REL_OFFSET32,
-	REL_COPY,
-	REL_SYM_OR_REL,
-	REL_TLS, /* everything past here is TLS */
-	REL_DTPMOD,
-	REL_DTPOFF,
-	REL_TPOFF,
-	REL_TPOFF_NEG,
-	REL_TLSDESC,
-};
-
-#include "reloc.h"
-
 int __init_tp(void *);
 void __init_libc(char **, char *);
 
@@ -129,7 +97,8 @@ static struct builtin_tls {
 } builtin_tls[1];
 #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
 
-static struct dso *head, *tail, *ldso, *fini_head;
+static struct dso ldso;
+static struct dso *head, *tail, *fini_head;
 static char *env_path, *sys_path;
 static unsigned long long gencnt;
 static int runtime;
@@ -145,14 +114,19 @@ static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
 
 struct debug *_dl_debug_addr = &debug;
 
-#define AUX_CNT 38
-#define DYN_CNT 34
+static int dl_strcmp(const char *l, const char *r)
+{
+	for (; *l==*r && *l; l++, r++);
+	return *(unsigned char *)l - *(unsigned char *)r;
+}
+#define strcmp(l,r) dl_strcmp(l,r)
 
 static void decode_vec(size_t *v, size_t *a, size_t cnt)
 {
-	memset(a, 0, cnt*sizeof(size_t));
-	for (; v[0]; v+=2) if (v[0]<cnt) {
-		a[0] |= 1ULL<<v[0];
+	size_t i;
+	for (i=0; i<cnt; i++) a[i] = 0;
+	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
+		a[0] |= 1UL<<v[0];
 		a[v[0]] = v[1];
 	}
 }
@@ -276,8 +250,6 @@ static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 	return def;
 }
 
-#define NO_INLINE_ADDEND (1<<REL_COPY | 1<<REL_GOT | 1<<REL_PLT)
-
 ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
 
 static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
@@ -288,7 +260,7 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	Sym *sym;
 	const char *name;
 	void *ctx;
-	int astype, type;
+	int type;
 	int sym_index;
 	struct symdef def;
 	size_t *reloc_addr;
@@ -297,14 +269,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	size_t addend;
 
 	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-		astype = R_TYPE(rel[1]);
-		if (!astype) continue;
-		type = remap_rel(astype);
-		if (!type) {
-			error("Error relocating %s: unsupported relocation type %d",
-				dso->name, astype);
-			continue;
-		}
+		if (dso->rel_early_relative && IS_RELATIVE(rel[1])) continue;
+		type = R_TYPE(rel[1]);
 		sym_index = R_SYM(rel[1]);
 		reloc_addr = (void *)(base + rel[0]);
 		if (sym_index) {
@@ -324,14 +290,19 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			def.dso = dso;
 		}
 
+		int gotplt = (type == REL_GOT || type == REL_PLT);
+		if (dso->rel_update_got && !gotplt) continue;
+
 		addend = stride>2 ? rel[2]
-			: (1<<type & NO_INLINE_ADDEND) ? 0
+			: gotplt || type==REL_COPY ? 0
 			: *reloc_addr;
 
 		sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
 		tls_val = def.sym ? def.sym->st_value : 0;
 
 		switch(type) {
+		case REL_NONE:
+			break;
 		case REL_OFFSET:
 			addend -= (size_t)reloc_addr;
 		case REL_SYMBOLIC:
@@ -395,6 +366,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 #endif
 			}
 			break;
+		default:
+			error("Error relocating %s: unsupported relocation type %d",
+				dso->name, type);
+			continue;
 		}
 	}
 }
@@ -711,22 +686,22 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 					if (!(reported & mask)) {
 						reported |= mask;
 						dprintf(1, "\t%s => %s (%p)\n",
-							name, ldso->name,
-							ldso->base);
+							name, ldso.name,
+							ldso.base);
 					}
 				}
 				is_self = 1;
 			}
 		}
 	}
-	if (!strcmp(name, ldso->name)) is_self = 1;
+	if (!strcmp(name, ldso.name)) is_self = 1;
 	if (is_self) {
-		if (!ldso->prev) {
-			tail->next = ldso;
-			ldso->prev = tail;
-			tail = ldso->next ? ldso->next : ldso;
+		if (!ldso.prev) {
+			tail->next = &ldso;
+			ldso.prev = tail;
+			tail = ldso.next ? ldso.next : &ldso;
 		}
-		return ldso;
+		return &ldso;
 	}
 	if (strchr(name, '/')) {
 		pathname = name;
@@ -752,13 +727,13 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 			if (!sys_path) {
 				char *prefix = 0;
 				size_t prefix_len;
-				if (ldso->name[0]=='/') {
+				if (ldso.name[0]=='/') {
 					char *s, *t, *z;
-					for (s=t=z=ldso->name; *s; s++)
+					for (s=t=z=ldso.name; *s; s++)
 						if (*s=='/') z=t, t=s;
-					prefix_len = z-ldso->name;
+					prefix_len = z-ldso.name;
 					if (prefix_len < PATH_MAX)
-						prefix = ldso->name;
+						prefix = ldso.name;
 				}
 				if (!prefix) {
 					prefix = "";
@@ -910,21 +885,40 @@ static void make_global(struct dso *p)
 	for (; p; p=p->next) p->global = 1;
 }
 
+static void do_mips_relocs(struct dso *p, size_t *got)
+{
+	size_t i, j, rel[2];
+	unsigned char *base = p->base;
+	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
+	if (p->rel_early_relative) {
+		got += i;
+	} else {
+		while (i--) *got++ += (size_t)base;
+	}
+	j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
+	i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
+	Sym *sym = p->syms + j;
+	rel[0] = (unsigned char *)got - base;
+	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
+		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
+		do_relocs(p, rel, sizeof rel, 2);
+	}
+}
+
 static void reloc_all(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
 	for (; p; p=p->next) {
 		if (p->relocated) continue;
 		decode_vec(p->dynv, dyn, DYN_CNT);
-#ifdef NEED_ARCH_RELOCS
-		do_arch_relocs(p, head);
-#endif
+		if (NEED_MIPS_GOT_RELOCS)
+			do_mips_relocs(p, (void *)(p->base+dyn[DT_PLTGOT]));
 		do_relocs(p, (void *)(p->base+dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
 			2+(dyn[DT_PLTREL]==DT_RELA));
 		do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
 		do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
 
-		if (p->relro_start != p->relro_end &&
+		if (head != &ldso && p->relro_start != p->relro_end &&
 		    mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
 			error("Error relocating %s: RELRO protection failed: %m",
 				p->name);
@@ -1121,19 +1115,52 @@ static void update_tls_size()
 	tls_align);
 }
 
-void *__dynlink(int argc, char **argv)
+/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
+ * following stage 2 and stage 3 functions via primitive symbolic lookup
+ * since it does not have access to their addresses to begin with. */
+
+/* Stage 2 of the dynamic linker is called after relative relocations 
+ * have been processed. It can make function calls to static functions
+ * and access string literals and static data, but cannot use extern
+ * symbols. Its job is to perform symbolic relocations on the dynamic
+ * linker itself, but some of the relocations performed may need to be
+ * replaced later due to copy relocations in the main program. */
+
+void __dls2(unsigned char *base)
 {
-	size_t aux[AUX_CNT] = {0};
+	Ehdr *ehdr = (void *)base;
+	ldso.base = base;
+	ldso.name = ldso.shortname = "libc.so";
+	ldso.global = 1;
+	ldso.phnum = ehdr->e_phnum;
+	ldso.phdr = (void *)(base + ehdr->e_phoff);
+	ldso.phentsize = ehdr->e_phentsize;
+	ldso.rel_early_relative = 1;
+	kernel_mapped_dso(&ldso);
+	decode_dyn(&ldso);
+
+	head = &ldso;
+	reloc_all(&ldso);
+
+	ldso.relocated = 0;
+	ldso.rel_update_got = 1;
+}
+
+/* Stage 3 of the dynamic linker is called with the dynamic linker/libc
+ * fully functional. Its job is to load (if not already loaded) and
+ * process dependencies and relocations for the main application and
+ * transfer control to its entry point. */
+
+_Noreturn void __dls3(size_t *sp)
+{
+	static struct dso app, vdso;
+	size_t aux[AUX_CNT] = {0}, *auxv;
 	size_t i;
-	Phdr *phdr;
-	Ehdr *ehdr;
-	static struct dso builtin_dsos[3];
-	struct dso *const app = builtin_dsos+0;
-	struct dso *const lib = builtin_dsos+1;
-	struct dso *const vdso = builtin_dsos+2;
 	char *env_preload=0;
 	size_t vdso_base;
-	size_t *auxv;
+	int argc = *sp;
+	char **argv = (void *)(sp+1);
+	char **argv_orig = argv;
 	char **envp = argv+argc+1;
 	void *initial_tls;
 
@@ -1157,60 +1184,42 @@ void *__dynlink(int argc, char **argv)
 	libc.page_size = aux[AT_PAGESZ];
 	libc.auxv = auxv;
 
-	/* If the dynamic linker was invoked as a program itself, AT_BASE
-	 * will not be set. In that case, we assume the base address is
-	 * the start of the page containing the PHDRs; I don't know any
-	 * better approach... */
-	if (!aux[AT_BASE]) {
-		aux[AT_BASE] = aux[AT_PHDR] & -PAGE_SIZE;
-		aux[AT_PHDR] = aux[AT_PHENT] = aux[AT_PHNUM] = 0;
-	}
-
-	/* The dynamic linker load address is passed by the kernel
-	 * in the AUX vector, so this is easy. */
-	lib->base = (void *)aux[AT_BASE];
-	lib->name = lib->shortname = "libc.so";
-	lib->global = 1;
-	ehdr = (void *)lib->base;
-	lib->phnum = ehdr->e_phnum;
-	lib->phdr = (void *)(aux[AT_BASE]+ehdr->e_phoff);
-	lib->phentsize = ehdr->e_phentsize;
-	kernel_mapped_dso(lib);
-	decode_dyn(lib);
-
-	if (aux[AT_PHDR]) {
+	/* If the main program was already loaded by the kernel,
+	 * AT_PHDR will point to some location other than the dynamic
+	 * linker's program headers. */
+	if (aux[AT_PHDR] != (size_t)ldso.phdr) {
 		size_t interp_off = 0;
 		size_t tls_image = 0;
 		/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
-		app->phdr = phdr = (void *)aux[AT_PHDR];
-		app->phnum = aux[AT_PHNUM];
-		app->phentsize = aux[AT_PHENT];
+		Phdr *phdr = app.phdr = (void *)aux[AT_PHDR];
+		app.phnum = aux[AT_PHNUM];
+		app.phentsize = aux[AT_PHENT];
 		for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
 			if (phdr->p_type == PT_PHDR)
-				app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
+				app.base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
 			else if (phdr->p_type == PT_INTERP)
 				interp_off = (size_t)phdr->p_vaddr;
 			else if (phdr->p_type == PT_TLS) {
 				tls_image = phdr->p_vaddr;
-				app->tls_len = phdr->p_filesz;
-				app->tls_size = phdr->p_memsz;
-				app->tls_align = phdr->p_align;
+				app.tls_len = phdr->p_filesz;
+				app.tls_size = phdr->p_memsz;
+				app.tls_align = phdr->p_align;
 			}
 		}
-		if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
-		if (interp_off) lib->name = (char *)app->base + interp_off;
+		if (app.tls_size) app.tls_image = (char *)app.base + tls_image;
+		if (interp_off) ldso.name = (char *)app.base + interp_off;
 		if ((aux[0] & (1UL<<AT_EXECFN))
 		    && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
-			app->name = (char *)aux[AT_EXECFN];
+			app.name = (char *)aux[AT_EXECFN];
 		else
-			app->name = argv[0];
-		kernel_mapped_dso(app);
+			app.name = argv[0];
+		kernel_mapped_dso(&app);
 	} else {
 		int fd;
 		char *ldname = argv[0];
 		size_t l = strlen(ldname);
 		if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
-		*argv++ = (void *)-1;
+		argv++;
 		while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
 			char *opt = argv[0]+2;
 			*argv++ = (void *)-1;
@@ -1229,8 +1238,8 @@ void *__dynlink(int argc, char **argv)
 			} else {
 				argv[0] = 0;
 			}
-			argv[-1] = (void *)-1;
 		}
+		argv[-1] = (void *)(argc - (argv-argv_orig));
 		if (!argv[0]) {
 			dprintf(2, "musl libc\n"
 				"Version %s\n"
@@ -1246,96 +1255,88 @@ void *__dynlink(int argc, char **argv)
 			_exit(1);
 		}
 		runtime = 1;
-		ehdr = (void *)map_library(fd, app);
+		Ehdr *ehdr = (void *)map_library(fd, &app);
 		if (!ehdr) {
 			dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
 			_exit(1);
 		}
 		runtime = 0;
 		close(fd);
-		lib->name = ldname;
-		app->name = argv[0];
-		aux[AT_ENTRY] = (size_t)app->base + ehdr->e_entry;
+		ldso.name = ldname;
+		app.name = argv[0];
+		aux[AT_ENTRY] = (size_t)app.base + ehdr->e_entry;
 		/* Find the name that would have been used for the dynamic
 		 * linker had ldd not taken its place. */
 		if (ldd_mode) {
-			for (i=0; i<app->phnum; i++) {
-				if (app->phdr[i].p_type == PT_INTERP)
-					lib->name = (void *)(app->base
-						+ app->phdr[i].p_vaddr);
+			for (i=0; i<app.phnum; i++) {
+				if (app.phdr[i].p_type == PT_INTERP)
+					ldso.name = (void *)(app.base
+						+ app.phdr[i].p_vaddr);
 			}
-			dprintf(1, "\t%s (%p)\n", lib->name, lib->base);
+			dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
 		}
 	}
-	if (app->tls_size) {
-		app->tls_id = tls_cnt = 1;
+	if (app.tls_size) {
+		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
-		app->tls_offset = 0;
-		tls_offset = app->tls_size
-			+ ( -((uintptr_t)app->tls_image + app->tls_size)
-			& (app->tls_align-1) );
+		app.tls_offset = 0;
+		tls_offset = app.tls_size
+			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+			& (app.tls_align-1) );
 #else
-		tls_offset = app->tls_offset = app->tls_size
-			+ ( -((uintptr_t)app->tls_image + app->tls_size)
-			& (app->tls_align-1) );
+		tls_offset = app.tls_offset = app.tls_size
+			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+			& (app.tls_align-1) );
 #endif
-		tls_align = MAXP2(tls_align, app->tls_align);
+		tls_align = MAXP2(tls_align, app.tls_align);
 	}
-	app->global = 1;
-	decode_dyn(app);
+	app.global = 1;
+	decode_dyn(&app);
 
 	/* Attach to vdso, if provided by the kernel */
 	if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
-		ehdr = (void *)vdso_base;
-		vdso->phdr = phdr = (void *)(vdso_base + ehdr->e_phoff);
-		vdso->phnum = ehdr->e_phnum;
-		vdso->phentsize = ehdr->e_phentsize;
+		Ehdr *ehdr = (void *)vdso_base;
+		Phdr *phdr = vdso.phdr = (void *)(vdso_base + ehdr->e_phoff);
+		vdso.phnum = ehdr->e_phnum;
+		vdso.phentsize = ehdr->e_phentsize;
 		for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
 			if (phdr->p_type == PT_DYNAMIC)
-				vdso->dynv = (void *)(vdso_base + phdr->p_offset);
+				vdso.dynv = (void *)(vdso_base + phdr->p_offset);
 			if (phdr->p_type == PT_LOAD)
-				vdso->base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
+				vdso.base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
 		}
-		vdso->name = "";
-		vdso->shortname = "linux-gate.so.1";
-		vdso->global = 1;
-		decode_dyn(vdso);
-		vdso->prev = lib;
-		lib->next = vdso;
+		vdso.name = "";
+		vdso.shortname = "linux-gate.so.1";
+		vdso.global = 1;
+		vdso.relocated = 1;
+		decode_dyn(&vdso);
+		vdso.prev = &ldso;
+		ldso.next = &vdso;
 	}
 
-	/* Initial dso chain consists only of the app. We temporarily
-	 * append the dynamic linker/libc so we can relocate it, then
-	 * restore the initial chain in preparation for loading third
-	 * party libraries (preload/needed). */
-	head = tail = app;
-	ldso = lib;
-	app->next = lib;
-	reloc_all(lib);
-	app->next = 0;
-
-	/* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
+	/* Initial dso chain consists only of the app. */
+	head = tail = &app;
 
 	/* Donate unused parts of app and library mapping to malloc */
-	reclaim_gaps(app);
-	reclaim_gaps(lib);
+	reclaim_gaps(&app);
+	reclaim_gaps(&ldso);
 
 	/* Load preload/needed libraries, add their symbols to the global
-	 * namespace, and perform all remaining relocations. The main
-	 * program must be relocated LAST since it may contain copy
-	 * relocations which depend on libraries' relocations. */
+	 * namespace, and perform all remaining relocations. */
 	if (env_preload) load_preload(env_preload);
-	load_deps(app);
-	make_global(app);
+	load_deps(&app);
+	make_global(&app);
 
 #ifndef DYNAMIC_IS_RO
-	for (i=0; app->dynv[i]; i+=2)
-		if (app->dynv[i]==DT_DEBUG)
-			app->dynv[i+1] = (size_t)&debug;
+	for (i=0; app.dynv[i]; i+=2)
+		if (app.dynv[i]==DT_DEBUG)
+			app.dynv[i+1] = (size_t)&debug;
 #endif
 
-	reloc_all(app->next);
-	reloc_all(app);
+	/* The main program must be relocated LAST since it may contin
+	 * copy relocations which depend on libraries' relocations. */
+	reloc_all(app.next);
+	reloc_all(&app);
 
 	update_tls_size();
 	if (libc.tls_size > sizeof builtin_tls) {
@@ -1359,14 +1360,13 @@ void *__dynlink(int argc, char **argv)
 
 	/* Switch to runtime mode: any further failures in the dynamic
 	 * linker are a reportable failure rather than a fatal startup
-	 * error. If the dynamic loader (dlopen) will not be used, free
-	 * all memory used by the dynamic linker. */
+	 * error. */
 	runtime = 1;
 
 	debug.ver = 1;
 	debug.bp = _dl_debug_state;
 	debug.head = head;
-	debug.base = lib->base;
+	debug.base = ldso.base;
 	debug.state = 0;
 	_dl_debug_state();
 
@@ -1375,7 +1375,8 @@ void *__dynlink(int argc, char **argv)
 	errno = 0;
 	do_init_fini(tail);
 
-	return (void *)aux[AT_ENTRY];
+	CRTJMP((void *)aux[AT_ENTRY], argv-1);
+	for(;;);
 }
 
 void *dlopen(const char *file, int mode)
diff --git a/src/ldso/i386/start.s b/src/ldso/i386/start.s
deleted file mode 100644
index c37a1faa..00000000
--- a/src/ldso/i386/start.s
+++ /dev/null
@@ -1,22 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	xor %ebp,%ebp
-	pop %edi
-	mov %esp,%esi
-	and $-16,%esp
-	push %ebp
-	push %ebp
-	push %esi
-	push %edi
-	call __dynlink
-	mov %esi,%esp
-1:	dec %edi
-	pop %esi
-	cmp $-1,%esi
-	jz 1b
-	inc %edi
-	push %esi
-	push %edi
-	xor %edx,%edx
-	jmp *%eax
diff --git a/src/ldso/microblaze/start.s b/src/ldso/microblaze/start.s
deleted file mode 100644
index 067e8613..00000000
--- a/src/ldso/microblaze/start.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# FIXME: clearing argv entries
-.global _dlstart
-_dlstart:
-	add     r19, r0, r0
-
-	lw      r5, r0, r1
-	addi    r6, r1, 4
-	mfs     r7, rpc
-	addi    r7, r7, _GLOBAL_OFFSET_TABLE_+8
-	addi    r7, r7, _DYNAMIC@GOTOFF
-	brlid   r15, __reloc_self@PLT
-	addik   r1, r1, -16
-
-	lwi     r5, r1, 16
-	brlid   r15, __dynlink@PLT
-	addi    r6, r1, 20
-	addik   r1, r1, 16
-
-	lwi     r4, r1, 0
-1:	lwi     r5, r1, 4
-	addi    r5, r5, 1
-	bnei    r5, 1f
-	addi    r4, r4, -1
-	addi    r1, r1, 4
-	bri     1b
-1:	swi     r4, r1, 0
-	add     r5, r0, r0
-	bra     r3
diff --git a/src/ldso/mips/start.s b/src/ldso/mips/start.s
deleted file mode 100644
index 0cadbf8a..00000000
--- a/src/ldso/mips/start.s
+++ /dev/null
@@ -1,46 +0,0 @@
-.hidden _DYNAMIC
-.hidden __reloc_self
-.set noreorder
-.set nomacro
-.global _dlstart
-.type _dlstart,@function
-_dlstart:
-	move $fp, $0
-
-	bgezal $0, 1f
-	nop
-2:	.gpword 2b
-	.gpword _DYNAMIC
-	.gpword __reloc_self
-1:	lw $gp, 0($ra)
-	subu $gp, $ra, $gp
-
-	lw $4, 0($sp)
-	addiu $5, $sp, 4
-	lw $6, 4($ra)
-	addu $6, $6, $gp
-	addiu $7, $gp, -0x7ff0
-	subu $sp, $sp, 16
-	lw $25, 8($ra)
-	add $25, $25, $gp
-	jalr $25
-	nop
-
-	lw $25, %call16(__dynlink)($gp)
-	lw $4, 16($sp)
-	addiu $5, $sp, 20
-	jalr $25
-	nop
-
-	add $sp, $sp, 16
-	li $6, -1
-	lw $4, ($sp)
-1:	lw $5, 4($sp)
-	bne $5, $6, 2f
-	nop
-	addu $sp, $sp, 4
-	addu $4, $4, -1
-	b 1b
-	nop
-2:	sw $4, ($sp)
-	jr $2
diff --git a/src/ldso/or1k/start.s b/src/ldso/or1k/start.s
deleted file mode 100644
index 83b7c2c6..00000000
--- a/src/ldso/or1k/start.s
+++ /dev/null
@@ -1,34 +0,0 @@
-.global _dlstart
-_dlstart:
-	l.jal	1f
-	 l.nop
-1:	l.movhi	r5, gotpchi(_GLOBAL_OFFSET_TABLE_+0)
-	l.ori	r5, r5, gotpclo(_GLOBAL_OFFSET_TABLE_+4)
-	l.add	r5, r5, r9
-	l.movhi	r3, gotoffhi(_DYNAMIC)
-	l.ori	r3, r3, gotofflo(_DYNAMIC)
-	l.add	r5, r5, r3
-
-	l.lwz	r3, 0(r1)
-	l.addi	r4, r1, 4
-	l.jal	plt(__reloc_self)
-	 l.addi	r1, r1, -16
-
-	l.lwz	r3, 16(r1)
-	l.jal	plt(__dynlink)
-	 l.addi	r4, r1, 20
-	l.addi	r1, r1, 16
-
-	l.lwz	r4, 0(r1)
-1:	l.addi	r4, r4, -1
-	l.lwz	r5, 4(r1)
-	l.sfeqi	r5, -1
-	l.bf	1b
-	 l.addi	r1, r1, 4
-
-	l.addi	r4, r4, 1
-	l.addi	r1, r1, -4
-	l.sw	0(r1), r4
-
-	l.jr	r11
-	 l.ori	r3, r0, 0
diff --git a/src/ldso/powerpc/start.s b/src/ldso/powerpc/start.s
deleted file mode 100644
index 6548d58f..00000000
--- a/src/ldso/powerpc/start.s
+++ /dev/null
@@ -1,29 +0,0 @@
-	.global _dlstart
-	.type   _dlstart,@function
-_dlstart:
-	bl      1f
-2:	.long   _DYNAMIC-2b
-1:	mflr    5
-	lwz     0, 0(5)
-	add     5, 0, 5
-	lwz     3, 0(1)
-	addi    4, 1, 4
-	addi    1, 1, -16
-	bl      __reloc_self
-
-	lwz     3, 16(1)
-	addi    4, 1, 20
-	bl      __dynlink
-	addi    1, 1, 16
-
-	lwz     4, 0(1)
-1:	addi    4, 4, -1
-	lwzu    5, 4(1)
-	cmpwi   5, -1
-	beq-    1b
-	addi    4, 4, 1
-	stwu    4, -4(1)
-
-	mtlr    3
-	li      3, 0
-	blr
diff --git a/src/ldso/sh/start.s b/src/ldso/sh/start.s
deleted file mode 100644
index 0d2d9136..00000000
--- a/src/ldso/sh/start.s
+++ /dev/null
@@ -1,26 +0,0 @@
-.text
-.global _dlstart
-.type   _dlstart, @function
-_dlstart:
-	mov.l  @r15, r4
-	mov    r15, r5
-	mov.l  L1, r0
-	bsrf   r0
-	 add   #4, r5
-
-2:	mov    r0, r2
-	mov.l  @r15+, r1
-1:	mov.l  @r15+, r0
-	cmp/eq #-1, r0
-	bt/s   1b
-	 add   #-1, r1
-
-	add    #1, r1
-	mov.l  r0, @-r15
-	mov.l  r1, @-r15
-	mov    #0, r4
-	jmp    @r2
-	 nop
-
-.align 2
-L1:	.long __dynlink@PLT-(2b-.)
diff --git a/src/ldso/start.c b/src/ldso/start.c
deleted file mode 100644
index 3471f6ce..00000000
--- a/src/ldso/start.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <stdlib.h>
-
-/* stub for archs that lack dynamic linker support */
-
-void _dlstart()
-{
-	_Exit(1);
-}
diff --git a/src/ldso/x32/start.s b/src/ldso/x32/start.s
deleted file mode 100644
index 3c3800aa..00000000
--- a/src/ldso/x32/start.s
+++ /dev/null
@@ -1,24 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	mov (%rsp),%rdi  /* move argc into 1st argument slot */
-	lea 4(%rsp),%rsi /* move argv into 2nd argument slot */
-	call __dynlink
-	/* in case the dynlinker was called directly, it sets the "consumed"
-	   argv values to -1. so we must loop over the array as long as -1
-	   is in the top argv slot, decrement argc, and then set the stackpointer
-	   to the new argc as well as argc's new value.
-	   as the x32 abi has longs in the argv array, we cannot use push/pop.*/
-	movl (%rsp),%edi /* copy argc into edi */
-	xor %rdx,%rdx /* we use rdx as an offset to the current argv member */
-1:	dec %edi
-	addl $4, %edx
-	movl (%rsp, %rdx), %esi
-	cmp $-1,%esi
-	jz 1b
-	inc %edi
-	subl $4, %edx
-	lea (%rsp, %rdx), %rsp /* set rsp to new argv[-1] */
-	movl %edi, (%rsp)      /* write new argc there */
-	xor %edx,%edx
-	jmp *%rax
diff --git a/src/ldso/x86_64/start.s b/src/ldso/x86_64/start.s
deleted file mode 100644
index 1c5598aa..00000000
--- a/src/ldso/x86_64/start.s
+++ /dev/null
@@ -1,16 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	mov (%rsp),%rdi
-	lea 8(%rsp),%rsi
-	call __dynlink
-	pop %rdi
-1:	dec %edi
-	pop %rsi
-	cmp $-1,%rsi
-	jz 1b
-	inc %edi
-	push %rsi
-	push %rdi
-	xor %edx,%edx
-	jmp *%rax