about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--arch/aarch64/crt_arch.h13
-rw-r--r--arch/aarch64/reloc.h38
-rw-r--r--arch/arm/crt_arch.h25
-rw-r--r--arch/arm/reloc.h36
-rw-r--r--arch/i386/crt_arch.h29
-rw-r--r--arch/i386/reloc.h45
-rw-r--r--arch/microblaze/crt_arch.h27
-rw-r--r--arch/microblaze/reloc.h47
-rw-r--r--arch/mips/crt_arch.h49
-rw-r--r--arch/mips/reloc.h78
-rw-r--r--arch/or1k/crt_arch.h28
-rw-r--r--arch/or1k/reloc.h54
-rw-r--r--arch/powerpc/crt_arch.h31
-rw-r--r--arch/powerpc/reloc.h51
-rw-r--r--arch/sh/crt_arch.h28
-rw-r--r--arch/sh/reloc.h36
-rw-r--r--arch/sh/src/__fpscr_values.c2
-rw-r--r--arch/x32/crt_arch.h21
-rw-r--r--arch/x32/reloc.h48
-rw-r--r--arch/x86_64/crt_arch.h21
-rw-r--r--arch/x86_64/reloc.h43
-rw-r--r--crt/crt1.c4
-rw-r--r--src/internal/dynlink.h57
-rw-r--r--src/ldso/aarch64/start.s18
-rw-r--r--src/ldso/arm/start.s18
-rw-r--r--src/ldso/dlstart.c107
-rw-r--r--src/ldso/dynlink.c347
-rw-r--r--src/ldso/i386/start.s22
-rw-r--r--src/ldso/microblaze/start.s28
-rw-r--r--src/ldso/mips/start.s46
-rw-r--r--src/ldso/or1k/start.s34
-rw-r--r--src/ldso/powerpc/start.s29
-rw-r--r--src/ldso/sh/start.s26
-rw-r--r--src/ldso/start.c8
-rw-r--r--src/ldso/x32/start.s24
-rw-r--r--src/ldso/x86_64/start.s16
37 files changed, 627 insertions, 909 deletions
diff --git a/Makefile b/Makefile
index 8cc3de81..02b44f8e 100644
--- a/Makefile
+++ b/Makefile
@@ -85,7 +85,7 @@ src/internal/version.h: $(wildcard VERSION .git)
 
 src/internal/version.lo: src/internal/version.h
 
-src/ldso/dynlink.lo: arch/$(ARCH)/reloc.h
+src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h
 
 crt/crt1.o crt/Scrt1.o: $(wildcard arch/$(ARCH)/crt_arch.h)
 
diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
index 32066881..3a4b321e 100644
--- a/arch/aarch64/crt_arch.h
+++ b/arch/aarch64/crt_arch.h
@@ -1,9 +1,14 @@
 __asm__(
-".global _start\n"
-".type _start,%function\n"
-"_start:\n"
+".global " START "\n"
+".type " START ",%function\n"
+START ":\n"
 "	mov x29, #0\n"
 "	mov x30, #0\n"
 "	mov x0, sp\n"
+".weak _DYNAMIC\n"
+".hidden _DYNAMIC\n"
+"	adrp x1, _DYNAMIC\n"
+"	add x1, x1, #:lo12:_DYNAMIC\n"
 "	and sp, x0, #-16\n"
-"	b __cstart\n");
+"	b " START "_c\n"
+);
diff --git a/arch/aarch64/reloc.h b/arch/aarch64/reloc.h
index e95ae9a8..1b0402bc 100644
--- a/arch/aarch64/reloc.h
+++ b/arch/aarch64/reloc.h
@@ -1,5 +1,3 @@
-#include <string.h>
-#include <elf.h>
 #include <endian.h>
 
 #if __BYTE_ORDER == __BIG_ENDIAN
@@ -14,27 +12,15 @@
 
 #define TPOFF_K 16
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_AARCH64_ABS64:
-		return REL_SYMBOLIC;
-	case R_AARCH64_GLOB_DAT:
-		return REL_GOT;
-	case R_AARCH64_JUMP_SLOT:
-		return REL_PLT;
-	case R_AARCH64_RELATIVE:
-		return REL_RELATIVE;
-	case R_AARCH64_COPY:
-		return REL_COPY;
-	case R_AARCH64_TLS_DTPMOD64:
-		return REL_DTPMOD;
-	case R_AARCH64_TLS_DTPREL64:
-		return REL_DTPOFF;
-	case R_AARCH64_TLS_TPREL64:
-		return REL_TPOFF;
-	case R_AARCH64_TLSDESC:
-		return REL_TLSDESC;
-	}
-	return 0;
-}
+#define REL_SYMBOLIC    R_AARCH64_ABS64
+#define REL_GOT         R_AARCH64_GLOB_DAT
+#define REL_PLT         R_AARCH64_JUMP_SLOT
+#define REL_RELATIVE    R_AARCH64_RELATIVE
+#define REL_COPY        R_AARCH64_COPY
+#define REL_DTPMOD      R_AARCH64_TLS_DTPMOD64
+#define REL_DTPOFF      R_AARCH64_TLS_DTPREL64
+#define REL_TPOFF       R_AARCH64_TLS_TPREL64
+#define REL_TLSDESC     R_AARCH64_TLSDESC
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/arm/crt_arch.h b/arch/arm/crt_arch.h
index bed99c27..d1f9a662 100644
--- a/arch/arm/crt_arch.h
+++ b/arch/arm/crt_arch.h
@@ -1,10 +1,15 @@
-__asm__("\
-.global _start \n\
-.type _start,%function \n\
-_start: \n\
-	mov fp, #0 \n\
-	mov lr, #0 \n\
-	mov a1, sp \n\
-	and sp, sp, #-16 \n\
-	bl __cstart \n\
-");
+__asm__(
+".global " START " \n"
+".type " START ",%function \n"
+START ": \n"
+"	mov fp, #0 \n"
+"	mov lr, #0 \n"
+"	mov a1, sp \n"
+"	ldr a2, 1f \n"
+"2:	add a2, pc, a2 \n"
+"	and sp, sp, #-16 \n"
+"	bl " START "_c \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"1:	.word _DYNAMIC-2b-8 \n"
+);
diff --git a/arch/arm/reloc.h b/arch/arm/reloc.h
index ee39b7fd..dec0031e 100644
--- a/arch/arm/reloc.h
+++ b/arch/arm/reloc.h
@@ -1,5 +1,3 @@
-#include <string.h>
-#include <elf.h>
 #include <endian.h>
 
 #if __BYTE_ORDER == __BIG_ENDIAN
@@ -20,25 +18,15 @@
 
 #define TPOFF_K 8
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_ARM_ABS32:
-		return REL_SYMBOLIC;
-	case R_ARM_GLOB_DAT:
-		return REL_GOT;
-	case R_ARM_JUMP_SLOT:
-		return REL_PLT;
-	case R_ARM_RELATIVE:
-		return REL_RELATIVE;
-	case R_ARM_COPY:
-		return REL_COPY;
-	case R_ARM_TLS_DTPMOD32:
-		return REL_DTPMOD;
-	case R_ARM_TLS_DTPOFF32:
-		return REL_DTPOFF;
-	case R_ARM_TLS_TPOFF32:
-		return REL_TPOFF;
-	}
-	return 0;
-}
+#define REL_SYMBOLIC    R_ARM_ABS32
+#define REL_GOT         R_ARM_GLOB_DAT
+#define REL_PLT         R_ARM_JUMP_SLOT
+#define REL_RELATIVE    R_ARM_RELATIVE
+#define REL_COPY        R_ARM_COPY
+#define REL_DTPMOD      R_ARM_TLS_DTPMOD32
+#define REL_DTPOFF      R_ARM_TLS_DTPOFF32
+#define REL_TPOFF       R_ARM_TLS_TPOFF32
+//#define REL_TLSDESC     R_ARM_TLS_DESC
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/i386/crt_arch.h b/arch/i386/crt_arch.h
index ae694f99..43c8477a 100644
--- a/arch/i386/crt_arch.h
+++ b/arch/i386/crt_arch.h
@@ -1,13 +1,16 @@
-__asm__("\
-.text \n\
-.global _start \n\
-_start: \n\
-	xor %ebp,%ebp \n\
-	mov %esp,%eax \n\
-	and $-16,%esp \n\
-	push %eax \n\
-	push %eax \n\
-	push %eax \n\
-	push %eax \n\
-	call __cstart \n\
-");
+__asm__(
+".text\n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+".global " START "\n"
+START ":\n"
+"	xor %ebp,%ebp \n"
+"	mov %esp,%eax \n"
+"	and $-16,%esp \n"
+"	push %eax \n"
+"	push %eax \n"
+"	call 1f \n"
+"1:	addl $_DYNAMIC-1b,(%esp) \n"
+"	push %eax \n"
+"	call " START "_c \n"
+);
diff --git a/arch/i386/reloc.h b/arch/i386/reloc.h
index eaf5aae0..b52ef402 100644
--- a/arch/i386/reloc.h
+++ b/arch/i386/reloc.h
@@ -1,33 +1,16 @@
-#include <string.h>
-#include <elf.h>
-
 #define LDSO_ARCH "i386"
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_386_32:
-		return REL_SYMBOLIC;
-	case R_386_PC32:
-		return REL_OFFSET;
-	case R_386_GLOB_DAT:
-		return REL_GOT;
-	case R_386_JMP_SLOT:
-		return REL_PLT;
-	case R_386_RELATIVE:
-		return REL_RELATIVE;
-	case R_386_COPY:
-		return REL_COPY;
-	case R_386_TLS_DTPMOD32:
-		return REL_DTPMOD;
-	case R_386_TLS_DTPOFF32:
-		return REL_DTPOFF;
-	case R_386_TLS_TPOFF:
-		return REL_TPOFF;
-	case R_386_TLS_TPOFF32:
-		return REL_TPOFF_NEG;
-	case R_386_TLS_DESC:
-		return REL_TLSDESC;
-	}
-	return 0;
-}
+#define REL_SYMBOLIC    R_386_32
+#define REL_OFFSET      R_386_PC32
+#define REL_GOT         R_386_GLOB_DAT
+#define REL_PLT         R_386_JMP_SLOT
+#define REL_RELATIVE    R_386_RELATIVE
+#define REL_COPY        R_386_COPY
+#define REL_DTPMOD      R_386_TLS_DTPMOD32
+#define REL_DTPOFF      R_386_TLS_DTPOFF32
+#define REL_TPOFF       R_386_TLS_TPOFF
+#define REL_TPOFF_NEG   R_386_TLS_TPOFF32
+#define REL_TLSDESC     R_386_TLS_DESC
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mov %1,%%esp ; jmp *%0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/microblaze/crt_arch.h b/arch/microblaze/crt_arch.h
index 8917c695..ada98c86 100644
--- a/arch/microblaze/crt_arch.h
+++ b/arch/microblaze/crt_arch.h
@@ -1,11 +1,16 @@
-__asm__("\
-.global _start \n\
-.align  2 \n\
-_start: \n\
-	add r19, r0, r0 \n\
-	ori r5, r1, 0 \n\
-	andi r1, r1, -8 \n\
-	addik r1, r1, -8 \n\
-	bri __cstart \n\
-	nop \n\
-");
+__asm__(
+".global " START " \n"
+".align  2 \n"
+START ": \n"
+"	add r19, r0, r0 \n"
+"	ori r5, r1, 0 \n"
+"1:	mfs r6, rpc \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"	addik r6, r6, _GLOBAL_OFFSET_TABLE_+8 \n"
+"	addik r6, r6, _DYNAMIC@GOTOFF \n"
+"	andi r1, r1, -8 \n"
+"	addik r1, r1, -8 \n"
+"	bri " START "_c \n"
+"	nop \n"
+);
diff --git a/arch/microblaze/reloc.h b/arch/microblaze/reloc.h
index 71a6219c..611db465 100644
--- a/arch/microblaze/reloc.h
+++ b/arch/microblaze/reloc.h
@@ -1,5 +1,3 @@
-#include <string.h>
-#include <elf.h>
 #include <endian.h>
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -12,40 +10,13 @@
 
 #define TPOFF_K 0
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_MICROBLAZE_32:
-		return REL_SYMBOLIC;
-	case R_MICROBLAZE_GLOB_DAT:
-		return REL_GOT;
-	case R_MICROBLAZE_JUMP_SLOT:
-		return REL_PLT;
-	case R_MICROBLAZE_REL:
-		return REL_RELATIVE;
-	case R_MICROBLAZE_COPY:
-		return REL_COPY;
-	case R_MICROBLAZE_TLSDTPMOD32:
-		return REL_DTPMOD;
-	case R_MICROBLAZE_TLSDTPREL32:
-		return REL_DTPOFF;
-	}
-	return 0;
-}
+#define REL_SYMBOLIC    R_MICROBLAZE_32
+#define REL_GOT         R_MICROBLAZE_GLOB_DAT
+#define REL_PLT         R_MICROBLAZE_JUMP_SLOT
+#define REL_RELATIVE    R_MICROBLAZE_REL
+#define REL_COPY        R_MICROBLAZE_COPY
+#define REL_DTPMOD      R_MICROBLAZE_TLSDTPMOD32
+#define REL_DTPOFF      R_MICROBLAZE_TLSDTPREL32
 
-#include "syscall.h"
-void __reloc_self(int c, size_t *a, size_t *dynv)
-{
-	char dot = '.', ex = 'x';
-	char *base;
-	size_t t[20], n;
-	for (a+=c+1; *a; a++);
-	for (a++; *a; a+=2) if (*a<20) t[*a] = a[1];
-	base = (char *)t[AT_BASE];
-	if (!base) base = (char *)(t[AT_PHDR] & -t[AT_PAGESZ]);
-	for (a=dynv; *a; a+=2) if (*a<20) t[*a] = a[1];
-	n = t[DT_RELASZ];
-	for (a=(void *)(base+t[DT_RELA]); n; a+=3, n-=12)
-		if (a[1]%256 == R_MICROBLAZE_REL)
-			*(size_t *)(base+a[0]) = (size_t)base + a[2];
-}
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"addik r1,%1,0 ; bra %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/mips/crt_arch.h b/arch/mips/crt_arch.h
index d4ae52d1..33bde4e6 100644
--- a/arch/mips/crt_arch.h
+++ b/arch/mips/crt_arch.h
@@ -1,21 +1,28 @@
-__asm__("\n\
-.set push\n\
-.set noreorder\n\
-.global __start\n\
-.global _start\n\
-.type   __start, @function\n\
-.type   _start, @function\n\
-__start:\n\
-_start:\n\
-	bal 1f \n\
-	move $fp, $0 \n\
-2:	.gpword 2b \n\
-1:	lw $gp, 0($ra) \n\
-	subu $gp, $ra, $gp \n\
-	move $4, $sp \n\
-	subu $sp, $sp, 16 \n\
-	and $sp, $sp, -8 \n\
-	lw $25, %call16(__cstart)($gp) \n\
-	jalr $25 \n\
-	nop \n\
-.set pop");
+__asm__(
+".set push\n"
+".set noreorder\n"
+".global _" START "\n"
+".global " START "\n"
+".type   _" START ", @function\n"
+".type   " START ", @function\n"
+"_" START ":\n"
+"" START ":\n"
+"	bal 1f \n"
+"	 move $fp, $0 \n"
+"2:	.gpword 2b \n"
+	.gpword " START "_c \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"	.gpword _DYNAMIC \n"
+"1:	lw $gp, 0($ra) \n"
+"	subu $gp, $ra, $gp \n"
+"	move $4, $sp \n"
+"	lw $5, 8($ra) \n"
+"	addu $5, $5, $gp \n"
+"	lw $25, 4($ra) \n"
+"	addu $25, $25, $gp \n"
+"	subu $sp, $sp, 16 \n"
+"	jalr $25 \n"
+"	 and $sp, $sp, -8 \n"
+".set pop \n"
+);
diff --git a/arch/mips/reloc.h b/arch/mips/reloc.h
index 4b81d328..8aa02852 100644
--- a/arch/mips/reloc.h
+++ b/arch/mips/reloc.h
@@ -1,5 +1,3 @@
-#include <string.h>
-#include <elf.h>
 #include <endian.h>
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -18,72 +16,16 @@
 
 #define TPOFF_K (-0x7000)
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_MIPS_REL32:
-		return REL_SYM_OR_REL;
-	case R_MIPS_JUMP_SLOT:
-		return REL_PLT;
-	case R_MIPS_COPY:
-		return REL_COPY;
-	case R_MIPS_TLS_DTPMOD32:
-		return REL_DTPMOD;
-	case R_MIPS_TLS_DTPREL32:
-		return REL_DTPOFF;
-	case R_MIPS_TLS_TPREL32:
-		return REL_TPOFF;
-	}
-	return 0;
-}
+#define REL_SYM_OR_REL  R_MIPS_REL32
+#define REL_PLT         R_MIPS_JUMP_SLOT
+#define REL_COPY        R_MIPS_COPY
+#define REL_DTPMOD      R_MIPS_TLS_DTPMOD32
+#define REL_DTPOFF      R_MIPS_TLS_DTPREL32
+#define REL_TPOFF       R_MIPS_TLS_TPREL32
 
-void __reloc_self(int c, size_t *a, size_t *dynv, size_t *got)
-{
-	char *base;
-	size_t t[20], n;
-	for (a+=c+1; *a; a++);
-	for (a++; *a; a+=2) if (*a<20) t[*a] = a[1];
-	base = (char *)t[AT_BASE];
-	if (!base) base = (char *)(t[AT_PHDR] & -t[AT_PAGESZ]);
-	for (a=dynv; *a; a+=2) if (*a-0x70000000UL<20) t[*a&31] = a[1];
-	n = t[DT_MIPS_LOCAL_GOTNO - 0x70000000];
-	for (a=got; n; a++, n--) *a += (size_t)base;
-}
-
-static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride);
-
-static void do_arch_relocs(struct dso *this, struct dso *head)
-{
-	unsigned char *base = this->base;
-	size_t *dynv = this->dynv;
-	size_t dyn[20] = {0};
-	size_t i;
-	size_t rel[2], got=0;
-	Sym *sym;
-
-	for (i=0; dynv[i]; i+=2) {
-		if (dynv[i]-0x70000000UL<20)
-			dyn[dynv[i]&31] = dynv[i+1];
-		else if (dynv[i] == DT_PLTGOT)
-			got = dynv[i+1];
-	}
-	i = dyn[DT_MIPS_LOCAL_GOTNO-0x70000000];
-	if (this->shortname && !strcmp(this->shortname, "libc.so")) {
-		got += sizeof(size_t) * i;
-	} else {
-		for (; i; i--, got+=sizeof(size_t))
-			*(size_t *)(base+got) += (size_t)base;
-	}
-	sym = this->syms + dyn[DT_MIPS_GOTSYM-0x70000000];
-	i = dyn[DT_MIPS_SYMTABNO-0x70000000] - dyn[DT_MIPS_GOTSYM-0x70000000];
-	for (; i; i--, got+=sizeof(size_t), sym++) {
-		rel[0] = got;
-		rel[1] = sym-this->syms << 8 | R_MIPS_JUMP_SLOT;
-		*(size_t *)(base+got) = 0;
-		do_relocs(this, rel, sizeof rel, 2);
-	}
-}
-
-#define NEED_ARCH_RELOCS 1
+#define NEED_MIPS_GOT_RELOCS 1
 #define DYNAMIC_IS_RO 1
 #define ARCH_SYM_REJECT_UND(s) (!((s)->st_other & STO_MIPS_PLT))
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"move $sp,%1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/or1k/crt_arch.h b/arch/or1k/crt_arch.h
index 0f381d2d..84415561 100644
--- a/arch/or1k/crt_arch.h
+++ b/arch/or1k/crt_arch.h
@@ -1,11 +1,17 @@
-__asm__("\
-.global _start \n\
-.align  4 \n\
-_start: \n\
-	l.ori r3, r1, 0 \n\
-	l.addi r2, r0, -8 \n\
-	l.and r1, r1, r2 \n\
-	l.addi r1, r1, -8 \n\
-	l.jal __cstart \n\
-	 l.ori r2, r0, 0 \n\
-");
+__asm__(
+".global " START " \n"
+".align  4 \n"
+START ": \n"
+"	l.jal 1f \n"
+"	 l.ori r3, r1, 0 \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"	.word _DYNAMIC-. \n"
+"1:	l.lwz r4, 0(r9) \n"
+"	l.add r4, r4, r9 \n"
+"	l.addi r2, r0, -8 \n"
+"	l.and r1, r1, r2 \n"
+"	l.addi r1, r1, -16 \n"
+"	l.jal " START "_c \n"
+"	 l.ori r2, r0, 0 \n"
+);
diff --git a/arch/or1k/reloc.h b/arch/or1k/reloc.h
index 830a800a..ddee45c5 100644
--- a/arch/or1k/reloc.h
+++ b/arch/or1k/reloc.h
@@ -1,47 +1,15 @@
-#include <string.h>
-#include <elf.h>
-#include <endian.h>
-
 #define LDSO_ARCH "or1k"
 
 #define TPOFF_K 0
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_OR1K_32:
-		return REL_SYMBOLIC;
-	case R_OR1K_GLOB_DAT:
-		return REL_GOT;
-	case R_OR1K_JMP_SLOT:
-		return REL_PLT;
-	case R_OR1K_RELATIVE:
-		return REL_RELATIVE;
-	case R_OR1K_COPY:
-		return REL_COPY;
-	case R_OR1K_TLS_DTPMOD:
-		return REL_DTPMOD;
-	case R_OR1K_TLS_DTPOFF:
-		return REL_DTPOFF;
-	case R_OR1K_TLS_TPOFF:
-		return REL_TPOFF;
-	}
-	return 0;
-}
-
-#include "syscall.h"
-void __reloc_self(int c, size_t *a, size_t *dynv)
-{
-	char dot = '.', ex = 'x';
-	char *base;
-	size_t t[20], n;
-	for (a+=c+1; *a; a++);
-	for (a++; *a; a+=2) if (*a<20) t[*a] = a[1];
-	base = (char *)t[AT_BASE];
-	if (!base) base = (char *)(t[AT_PHDR] & -t[AT_PAGESZ]);
-	for (a=dynv; *a; a+=2) if (*a<20) t[*a] = a[1];
-	n = t[DT_RELASZ];
-	for (a=(void *)(base+t[DT_RELA]); n; a+=3, n-=12)
-		if (a[1]%256 == R_OR1K_RELATIVE)
-			*(size_t *)(base+a[0]) = (size_t)base + a[2];
-}
+#define REL_SYMBOLIC    R_OR1K_32
+#define REL_GOT         R_OR1K_GLOB_DAT
+#define REL_PLT         R_OR1K_JMP_SLOT
+#define REL_RELATIVE    R_OR1K_RELATIVE
+#define REL_COPY        R_OR1K_COPY
+#define REL_DTPMOD      R_OR1K_TLS_DTPMOD
+#define REL_DTPOFF      R_OR1K_TLS_DTPOFF
+#define REL_TPOFF       R_OR1K_TLS_TPOFF
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"l.jr %0 ; l.ori r1,%1,0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/powerpc/crt_arch.h b/arch/powerpc/crt_arch.h
index 8cc53d98..ec3cd29e 100644
--- a/arch/powerpc/crt_arch.h
+++ b/arch/powerpc/crt_arch.h
@@ -1,12 +1,19 @@
-__asm__("\
-.global _start \n\
-.type   _start, %function \n\
-_start: \n\
-	mr 3, 1 \n\
-	clrrwi 1, 1, 4 \n\
-	li 0, 0 \n\
-	stwu 1, -16(1) \n\
-	mtlr 0 \n\
-	stw 0, 0(1) \n\
-	bl __cstart \n\
-");        
+__asm__(
+".global " START " \n"
+".type   " START ", %function \n"
+START ": \n"
+"	bl 1f \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"	.long _DYNAMIC-. \n"
+"1:	mflr 4 \n"
+"	lwz 3, 0(4) \n"
+"	add 4, 3, 4 \n"
+"	mr 3, 1 \n"
+"	clrrwi 1, 1, 4 \n"
+"	li 0, 0 \n"
+"	stwu 1, -16(1) \n"
+"	mtlr 0 \n"
+"	stw 0, 0(1) \n"
+"	bl " START "_c \n"
+);
diff --git a/arch/powerpc/reloc.h b/arch/powerpc/reloc.h
index 73c583b7..aa5f8c93 100644
--- a/arch/powerpc/reloc.h
+++ b/arch/powerpc/reloc.h
@@ -1,44 +1,15 @@
-#include <string.h>
-#include <elf.h>
-
 #define LDSO_ARCH "powerpc"
 
 #define TPOFF_K (-0x7000)
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_PPC_ADDR32:
-		return REL_SYMBOLIC;
-	case R_PPC_GLOB_DAT:
-		return REL_GOT;
-	case R_PPC_JMP_SLOT:
-		return REL_PLT;
-	case R_PPC_RELATIVE:
-		return REL_RELATIVE;
-	case R_PPC_COPY:
-		return REL_COPY;
-	case R_PPC_DTPMOD32:
-		return REL_DTPMOD;
-	case R_PPC_DTPREL32:
-		return REL_DTPOFF;
-	case R_PPC_TPREL32:
-		return REL_TPOFF;
-	}
-	return 0;
-}
-
-void __reloc_self(int c, size_t *a, size_t *dynv)
-{
-	char *base;
-	size_t t[20], n;
-	for (a+=c+1; *a; a++);
-	for (a++; *a; a+=2) if (*a<20) t[*a] = a[1];
-	base = (char *)t[AT_BASE];
-	if (!base) base = (char *)(t[AT_PHDR] & -t[AT_PAGESZ]);
-	for (a=dynv; *a; a+=2) if (*a<20) t[*a] = a[1];
-	n = t[DT_RELASZ];
-	for (a=(void *)(base+t[DT_RELA]); n; a+=3, n-=12)
-		if (a[1]%256 == R_PPC_RELATIVE)
-			*(size_t *)(base+a[0]) = (size_t)base + a[2];
-}
+#define REL_SYMBOLIC    R_PPC_ADDR32
+#define REL_GOT         R_PPC_GLOB_DAT
+#define REL_PLT         R_PPC_JMP_SLOT
+#define REL_RELATIVE    R_PPC_RELATIVE
+#define REL_COPY        R_PPC_COPY
+#define REL_DTPMOD      R_PPC_DTPMOD32
+#define REL_DTPOFF      R_PPC_DTPREL32
+#define REL_TPOFF       R_PPC_TPREL32
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mr 1,%1 ; mtlr %0 ; blr" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/sh/crt_arch.h b/arch/sh/crt_arch.h
index 5fd39fc6..a873ffdb 100644
--- a/arch/sh/crt_arch.h
+++ b/arch/sh/crt_arch.h
@@ -1,12 +1,22 @@
-__asm__("\
-.global _start \n\
-_start: \n\
-	mov r15, r4 \n\
-	mov #-16, r0 \n\
-	and r0, r15 \n\
-	bsr __cstart \n\
-	nop \n\
-");
+__asm__(
+".global " START " \n"
+START ": \n"
+"	mova 1f, r0 \n"
+"	mov.l 1f, r5 \n"
+"	add r0, r5 \n"
+"	mov r15, r4 \n"
+"	mov #-16, r0 \n"
+"	and r0, r15 \n"
+"	bsr " START "_c \n"
+"	nop \n"
+".align 2 \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"1:	.long _DYNAMIC-. \n"
+);
 
 /* used by gcc for switching the FPU between single and double precision */
+#ifdef SHARED
+__attribute__((__visibility__("hidden")))
+#endif
 const unsigned long __fpscr_values[2] = { 0, 0x80000 };
diff --git a/arch/sh/reloc.h b/arch/sh/reloc.h
index aeb02d05..e7e4b38f 100644
--- a/arch/sh/reloc.h
+++ b/arch/sh/reloc.h
@@ -8,27 +8,15 @@
 
 #define TPOFF_K 8
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_SH_DIR32:
-		return REL_SYMBOLIC;
-	case R_SH_REL32:
-		return REL_OFFSET;
-	case R_SH_GLOB_DAT:
-		return REL_GOT;
-	case R_SH_JMP_SLOT:
-		return REL_PLT;
-	case R_SH_RELATIVE:
-		return REL_RELATIVE;
-	case R_SH_COPY:
-		return REL_COPY;
-	case R_SH_TLS_DTPMOD32:
-		return REL_DTPMOD;
-	case R_SH_TLS_DTPOFF32:
-		return REL_DTPOFF;
-	case R_SH_TLS_TPOFF32:
-		return REL_TPOFF;
-	}
-	return 0;
-}
+#define REL_SYMBOLIC    R_SH_DIR32
+#define REL_OFFSET      R_SH_REL32
+#define REL_GOT         R_SH_GLOB_DAT
+#define REL_PLT         R_SH_JMP_SLOT
+#define REL_RELATIVE    R_SH_RELATIVE
+#define REL_COPY        R_SH_COPY
+#define REL_DTPMOD      R_SH_TLS_DTPMOD32
+#define REL_DTPOFF      R_SH_TLS_DTPOFF32
+#define REL_TPOFF       R_SH_TLS_TPOFF32
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"jmp @%0 ; mov %1,r15" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/sh/src/__fpscr_values.c b/arch/sh/src/__fpscr_values.c
index 64b458f9..374df30e 100644
--- a/arch/sh/src/__fpscr_values.c
+++ b/arch/sh/src/__fpscr_values.c
@@ -1,5 +1,5 @@
 #include "libc.h"
 
 /* used by gcc for switching the FPU between single and double precision */
-const unsigned long __fpscr_values[2] ATTR_LIBC_VISIBILITY = { 0, 0x80000 };
+//const unsigned long __fpscr_values[2] ATTR_LIBC_VISIBILITY = { 0, 0x80000 };
 
diff --git a/arch/x32/crt_arch.h b/arch/x32/crt_arch.h
index db692950..3eec61bd 100644
--- a/arch/x32/crt_arch.h
+++ b/arch/x32/crt_arch.h
@@ -1,9 +1,12 @@
-__asm__("\
-.text \n\
-.global _start \n\
-_start: \n\
-	xor %rbp,%rbp \n\
-	mov %rsp,%rdi \n\
-	andq $-16,%rsp \n\
-	call __cstart \n\
-");
+__asm__(
+".text \n"
+".global " START " \n"
+START ": \n"
+"	xor %rbp,%rbp \n"
+"	mov %rsp,%rdi \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"	lea _DYNAMIC(%rip),%rsi \n"
+"	andq $-16,%rsp \n"
+"	call " START "_c \n"
+);
diff --git a/arch/x32/reloc.h b/arch/x32/reloc.h
index fcfbf99e..7c72d266 100644
--- a/arch/x32/reloc.h
+++ b/arch/x32/reloc.h
@@ -1,7 +1,3 @@
-#include <stdint.h>
-#include <string.h>
-#include <elf.h>
-
 #define LDSO_ARCH "x32"
 
 /* FIXME: x32 is very strange in its use of 64-bit relocation types in
@@ -11,30 +7,20 @@
  * checked. In particular, R_X86_64_64, R_X86_64_DTPOFF64, and
  * R_X86_64_TPOFF64 may need checking. */
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_X86_64_64:
-	case R_X86_64_32:
-		return REL_SYMBOLIC;
-	case R_X86_64_PC32:
-		return REL_OFFSET;
-	case R_X86_64_GLOB_DAT:
-		return REL_GOT;
-	case R_X86_64_JUMP_SLOT:
-		return REL_PLT;
-	case R_X86_64_RELATIVE:
-		return REL_RELATIVE;
-	case R_X86_64_COPY:
-		return REL_COPY;
-	case R_X86_64_DTPMOD64:
-		return REL_DTPMOD;
-	case R_X86_64_DTPOFF64:
-	case R_X86_64_DTPOFF32:
-		return REL_DTPOFF;
-	case R_X86_64_TPOFF64:
-	case R_X86_64_TPOFF32:
-		return REL_TPOFF;
-	}
-	return 0;
-}
+/* The R_X86_64_64, R_X86_64_DTPOFF32, and R_X86_64_TPOFF32 reloc types
+ * were previously mapped in the switch table form of this file; however,
+ * they do not seem to be used/usable for anything. If needed, new
+ * mappings will have to be added. */
+
+#define REL_SYMBOLIC    R_X86_64_32
+#define REL_OFFSET      R_X86_64_PC32
+#define REL_GOT         R_X86_64_GLOB_DAT
+#define REL_PLT         R_X86_64_JUMP_SLOT
+#define REL_RELATIVE    R_X86_64_RELATIVE
+#define REL_COPY        R_X86_64_COPY
+#define REL_DTPMOD      R_X86_64_DTPMOD64
+#define REL_DTPOFF      R_X86_64_DTPOFF64
+#define REL_TPOFF       R_X86_64_TPOFF64
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mov %1,%%esp ; jmp *%0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/x86_64/crt_arch.h b/arch/x86_64/crt_arch.h
index db692950..3eec61bd 100644
--- a/arch/x86_64/crt_arch.h
+++ b/arch/x86_64/crt_arch.h
@@ -1,9 +1,12 @@
-__asm__("\
-.text \n\
-.global _start \n\
-_start: \n\
-	xor %rbp,%rbp \n\
-	mov %rsp,%rdi \n\
-	andq $-16,%rsp \n\
-	call __cstart \n\
-");
+__asm__(
+".text \n"
+".global " START " \n"
+START ": \n"
+"	xor %rbp,%rbp \n"
+"	mov %rsp,%rdi \n"
+".weak _DYNAMIC \n"
+".hidden _DYNAMIC \n"
+"	lea _DYNAMIC(%rip),%rsi \n"
+"	andq $-16,%rsp \n"
+"	call " START "_c \n"
+);
diff --git a/arch/x86_64/reloc.h b/arch/x86_64/reloc.h
index 9bc58496..84c075c3 100644
--- a/arch/x86_64/reloc.h
+++ b/arch/x86_64/reloc.h
@@ -1,32 +1,15 @@
-#include <stdint.h>
-#include <string.h>
-#include <elf.h>
-
 #define LDSO_ARCH "x86_64"
 
-static int remap_rel(int type)
-{
-	switch(type) {
-	case R_X86_64_64:
-		return REL_SYMBOLIC;
-	case R_X86_64_PC32:
-		return REL_OFFSET32;
-	case R_X86_64_GLOB_DAT:
-		return REL_GOT;
-	case R_X86_64_JUMP_SLOT:
-		return REL_PLT;
-	case R_X86_64_RELATIVE:
-		return REL_RELATIVE;
-	case R_X86_64_COPY:
-		return REL_COPY;
-	case R_X86_64_DTPMOD64:
-		return REL_DTPMOD;
-	case R_X86_64_DTPOFF64:
-		return REL_DTPOFF;
-	case R_X86_64_TPOFF64:
-		return REL_TPOFF;
-	case R_X86_64_TLSDESC:
-		return REL_TLSDESC;
-	}
-	return 0;
-}
+#define REL_SYMBOLIC    R_X86_64_64
+#define REL_OFFSET32    R_X86_64_PC32
+#define REL_GOT         R_X86_64_GLOB_DAT
+#define REL_PLT         R_X86_64_JUMP_SLOT
+#define REL_RELATIVE    R_X86_64_RELATIVE
+#define REL_COPY        R_X86_64_COPY
+#define REL_DTPMOD      R_X86_64_DTPMOD64
+#define REL_DTPOFF      R_X86_64_DTPOFF64
+#define REL_TPOFF       R_X86_64_TPOFF64
+#define REL_TLSDESC     R_X86_64_TLSDESC
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mov %1,%%rsp ; jmp *%0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/crt/crt1.c b/crt/crt1.c
index 14c4a52a..af02af94 100644
--- a/crt/crt1.c
+++ b/crt/crt1.c
@@ -1,5 +1,7 @@
 #include <features.h>
 
+#define START "_start"
+
 #include "crt_arch.h"
 
 int main();
@@ -8,7 +10,7 @@ void _fini() __attribute__((weak));
 _Noreturn int __libc_start_main(int (*)(), int, char **,
 	void (*)(), void(*)(), void(*)());
 
-void __cstart(long *p)
+void _start_c(long *p)
 {
 	int argc = p[0];
 	char **argv = (void *)(p+1);
diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
new file mode 100644
index 00000000..53661d62
--- /dev/null
+++ b/src/internal/dynlink.h
@@ -0,0 +1,57 @@
+#ifndef _INTERNAL_RELOC_H
+#define _INTERNAL_RELOC_H
+
+#include <features.h>
+#include <elf.h>
+#include <stdint.h>
+
+#if UINTPTR_MAX == 0xffffffff
+typedef Elf32_Ehdr Ehdr;
+typedef Elf32_Phdr Phdr;
+typedef Elf32_Sym Sym;
+#define R_TYPE(x) ((x)&255)
+#define R_SYM(x) ((x)>>8)
+#else
+typedef Elf64_Ehdr Ehdr;
+typedef Elf64_Phdr Phdr;
+typedef Elf64_Sym Sym;
+#define R_TYPE(x) ((x)&0x7fffffff)
+#define R_SYM(x) ((x)>>32)
+#endif
+
+/* These enum constants provide unmatchable default values for
+ * any relocation type the arch does not use. */
+enum {
+	REL_NONE = 0,
+	REL_SYMBOLIC = -100,
+	REL_GOT,
+	REL_PLT,
+	REL_RELATIVE,
+	REL_OFFSET,
+	REL_OFFSET32,
+	REL_COPY,
+	REL_SYM_OR_REL,
+	REL_DTPMOD,
+	REL_DTPOFF,
+	REL_TPOFF,
+	REL_TPOFF_NEG,
+	REL_TLSDESC,
+};
+
+#include "reloc.h"
+
+#define IS_RELATIVE(x) ( \
+	(R_TYPE(x) == REL_RELATIVE) || \
+	(R_TYPE(x) == REL_SYM_OR_REL && !R_SYM(x)) )
+
+#ifndef NEED_MIPS_GOT_RELOCS
+#define NEED_MIPS_GOT_RELOCS 0
+#endif
+
+#define AUX_CNT 32
+#define DYN_CNT 32
+
+typedef void (*stage2_func)(unsigned char *);
+typedef _Noreturn void (*stage3_func)(size_t *);
+
+#endif
diff --git a/src/ldso/aarch64/start.s b/src/ldso/aarch64/start.s
deleted file mode 100644
index 41d1d1e2..00000000
--- a/src/ldso/aarch64/start.s
+++ /dev/null
@@ -1,18 +0,0 @@
-.global _dlstart
-_dlstart:
-	ldr x0,[sp]
-	add x1,sp,#8
-	bl __dynlink
-	mov x1,sp
-	ldr x2,[x1],#8
-1:	sub x2,x2,1
-	ldr x3,[x1],#8
-	cmn x3,#1
-	b.eq 1b
-	add x2,x2,1
-	str x3,[x1,#-8]!
-	str x2,[x1,#-8]!
-	mov sp,x1
-	mov x1,x0
-	mov x0,#0
-	blr x1
diff --git a/src/ldso/arm/start.s b/src/ldso/arm/start.s
deleted file mode 100644
index 5dd93b55..00000000
--- a/src/ldso/arm/start.s
+++ /dev/null
@@ -1,18 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	ldr r0,[sp]
-	add r1,sp,#4
-	bl __dynlink
-	pop {r1}
-1:	sub r1,r1,#1
-	pop {r2}
-	cmp r2,#-1
-	beq 1b
-	add r1,r1,#1
-	push {r1,r2}
-	mov r1,r0
-	mov r0,#0
-	tst r1,#1
-	moveq pc,r1
-	bx r1
diff --git a/src/ldso/dlstart.c b/src/ldso/dlstart.c
new file mode 100644
index 00000000..5bd2a080
--- /dev/null
+++ b/src/ldso/dlstart.c
@@ -0,0 +1,107 @@
+#include <stddef.h>
+#include "dynlink.h"
+
+#ifdef SHARED
+
+#ifndef START
+#define START "_dlstart"
+#endif
+
+#include "crt_arch.h"
+
+void _dlstart_c(size_t *sp, size_t *dynv)
+{
+	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
+
+	int argc = *sp;
+	char **argv = (void *)(sp+1);
+
+	for (i=argc+1; argv[i]; i++);
+	size_t *auxv = (void *)(argv+i+1);
+
+	for (i=0; i<AUX_CNT; i++) aux[i] = 0;
+	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT)
+		aux[auxv[i]] = auxv[i+1];
+
+	for (i=0; i<DYN_CNT; i++) dyn[i] = 0;
+	for (i=0; dynv[i]; i+=2) if (dynv[i]<DYN_CNT)
+		dyn[dynv[i]] = dynv[i+1];
+
+	/* If the dynamic linker is invoked as a command, its load
+	 * address is not available in the aux vector. Instead, compute
+	 * the load address as the difference between &_DYNAMIC and the
+	 * virtual address in the PT_DYNAMIC program header. */
+	unsigned char *base = (void *)aux[AT_BASE];
+	if (!base) {
+		size_t phnum = aux[AT_PHNUM];
+		size_t phentsize = aux[AT_PHENT];
+		Phdr *ph = (void *)aux[AT_PHDR];
+		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
+			if (ph->p_type == PT_DYNAMIC) {
+				base = (void *)((size_t)dynv - ph->p_vaddr);
+				break;
+			}
+		}
+	}
+
+	/* MIPS uses an ugly packed form for GOT relocations. Since we
+	 * can't make function calls yet and the code is tiny anyway,
+	 * it's simply inlined here. */
+	if (NEED_MIPS_GOT_RELOCS) {
+		size_t local_cnt = 0;
+		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
+		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
+			local_cnt = dynv[i+1];
+		for (i=0; i<local_cnt; i++) got[i] += (size_t)base;
+	}
+
+	/* The use of the reloc_info structure and nested loops is a trick
+	 * to work around the fact that we can't necessarily make function
+	 * calls yet. Each struct in the array serves like the arguments
+	 * to a function call. */
+	struct {
+		void *rel;
+		size_t size;
+		size_t stride;
+	} reloc_info[] = {
+		{ base+dyn[DT_JMPREL], dyn[DT_PLTRELSZ], 2+(dyn[DT_PLTREL]==DT_RELA) },
+		{ base+dyn[DT_REL], dyn[DT_RELSZ], 2 },
+		{ base+dyn[DT_RELA], dyn[DT_RELASZ], 3 },
+		{ 0, 0, 0 }
+	};
+
+	for (i=0; reloc_info[i].stride; i++) {
+		size_t *rel = reloc_info[i].rel;
+		size_t rel_size = reloc_info[i].size;
+		size_t stride = reloc_info[i].stride;
+		for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+			if (!IS_RELATIVE(rel[1])) continue;
+			size_t *rel_addr = (void *)(base + rel[0]);
+			size_t addend = stride==3 ? rel[2] : *rel_addr;
+			*rel_addr = (size_t)base + addend;
+		}
+	}
+
+	const char *strings = (void *)(base + dyn[DT_STRTAB]);
+	const Sym *syms = (void *)(base + dyn[DT_SYMTAB]);
+
+	/* Call dynamic linker stage-2, __dls2 */
+	for (i=0; ;i++) {
+		const char *s = strings + syms[i].st_name;
+		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+		 && s[3]=='l' && s[4]=='s' && s[5]=='2' && !s[6])
+			break;
+	}
+	((stage2_func)(base + syms[i].st_value))(base);
+
+	/* Call dynamic linker stage-3, __dls3 */
+	for (i=0; ;i++) {
+		const char *s = strings + syms[i].st_name;
+		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+		 && s[3]=='l' && s[4]=='s' && s[5]=='3' && !s[6])
+			break;
+	}
+	((stage3_func)(base + syms[i].st_value))(sp);
+}
+
+#endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f6ed8011..1008e3ea 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -19,26 +19,13 @@
 #include <dlfcn.h>
 #include "pthread_impl.h"
 #include "libc.h"
+#include "dynlink.h"
 
 static int errflag;
 static char errbuf[128];
 
 #ifdef SHARED
 
-#if ULONG_MAX == 0xffffffff
-typedef Elf32_Ehdr Ehdr;
-typedef Elf32_Phdr Phdr;
-typedef Elf32_Sym Sym;
-#define R_TYPE(x) ((x)&255)
-#define R_SYM(x) ((x)>>8)
-#else
-typedef Elf64_Ehdr Ehdr;
-typedef Elf64_Phdr Phdr;
-typedef Elf64_Sym Sym;
-#define R_TYPE(x) ((x)&0xffffffff)
-#define R_SYM(x) ((x)>>32)
-#endif
-
 #define MAXP2(a,b) (-(-(a)&-(b)))
 #define ALIGN(x,y) ((x)+(y)-1 & -(y))
 
@@ -88,6 +75,7 @@ struct dso {
 	volatile int new_dtv_idx, new_tls_idx;
 	struct td_index *td_index;
 	struct dso *fini_next;
+	int rel_early_relative, rel_update_got;
 	char *shortname;
 	char buf[];
 };
@@ -97,26 +85,6 @@ struct symdef {
 	struct dso *dso;
 };
 
-enum {
-	REL_ERR,
-	REL_SYMBOLIC,
-	REL_GOT,
-	REL_PLT,
-	REL_RELATIVE,
-	REL_OFFSET,
-	REL_OFFSET32,
-	REL_COPY,
-	REL_SYM_OR_REL,
-	REL_TLS, /* everything past here is TLS */
-	REL_DTPMOD,
-	REL_DTPOFF,
-	REL_TPOFF,
-	REL_TPOFF_NEG,
-	REL_TLSDESC,
-};
-
-#include "reloc.h"
-
 int __init_tp(void *);
 void __init_libc(char **, char *);
 
@@ -129,7 +97,8 @@ static struct builtin_tls {
 } builtin_tls[1];
 #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
 
-static struct dso *head, *tail, *ldso, *fini_head;
+static struct dso ldso;
+static struct dso *head, *tail, *fini_head;
 static char *env_path, *sys_path;
 static unsigned long long gencnt;
 static int runtime;
@@ -145,14 +114,19 @@ static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
 
 struct debug *_dl_debug_addr = &debug;
 
-#define AUX_CNT 38
-#define DYN_CNT 34
+static int dl_strcmp(const char *l, const char *r)
+{
+	for (; *l==*r && *l; l++, r++);
+	return *(unsigned char *)l - *(unsigned char *)r;
+}
+#define strcmp(l,r) dl_strcmp(l,r)
 
 static void decode_vec(size_t *v, size_t *a, size_t cnt)
 {
-	memset(a, 0, cnt*sizeof(size_t));
-	for (; v[0]; v+=2) if (v[0]<cnt) {
-		a[0] |= 1ULL<<v[0];
+	size_t i;
+	for (i=0; i<cnt; i++) a[i] = 0;
+	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
+		a[0] |= 1UL<<v[0];
 		a[v[0]] = v[1];
 	}
 }
@@ -276,8 +250,6 @@ static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 	return def;
 }
 
-#define NO_INLINE_ADDEND (1<<REL_COPY | 1<<REL_GOT | 1<<REL_PLT)
-
 ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
 
 static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
@@ -288,7 +260,7 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	Sym *sym;
 	const char *name;
 	void *ctx;
-	int astype, type;
+	int type;
 	int sym_index;
 	struct symdef def;
 	size_t *reloc_addr;
@@ -297,14 +269,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	size_t addend;
 
 	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-		astype = R_TYPE(rel[1]);
-		if (!astype) continue;
-		type = remap_rel(astype);
-		if (!type) {
-			error("Error relocating %s: unsupported relocation type %d",
-				dso->name, astype);
-			continue;
-		}
+		if (dso->rel_early_relative && IS_RELATIVE(rel[1])) continue;
+		type = R_TYPE(rel[1]);
 		sym_index = R_SYM(rel[1]);
 		reloc_addr = (void *)(base + rel[0]);
 		if (sym_index) {
@@ -324,14 +290,19 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			def.dso = dso;
 		}
 
+		int gotplt = (type == REL_GOT || type == REL_PLT);
+		if (dso->rel_update_got && !gotplt) continue;
+
 		addend = stride>2 ? rel[2]
-			: (1<<type & NO_INLINE_ADDEND) ? 0
+			: gotplt || type==REL_COPY ? 0
 			: *reloc_addr;
 
 		sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
 		tls_val = def.sym ? def.sym->st_value : 0;
 
 		switch(type) {
+		case REL_NONE:
+			break;
 		case REL_OFFSET:
 			addend -= (size_t)reloc_addr;
 		case REL_SYMBOLIC:
@@ -395,6 +366,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 #endif
 			}
 			break;
+		default:
+			error("Error relocating %s: unsupported relocation type %d",
+				dso->name, type);
+			continue;
 		}
 	}
 }
@@ -711,22 +686,22 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 					if (!(reported & mask)) {
 						reported |= mask;
 						dprintf(1, "\t%s => %s (%p)\n",
-							name, ldso->name,
-							ldso->base);
+							name, ldso.name,
+							ldso.base);
 					}
 				}
 				is_self = 1;
 			}
 		}
 	}
-	if (!strcmp(name, ldso->name)) is_self = 1;
+	if (!strcmp(name, ldso.name)) is_self = 1;
 	if (is_self) {
-		if (!ldso->prev) {
-			tail->next = ldso;
-			ldso->prev = tail;
-			tail = ldso->next ? ldso->next : ldso;
+		if (!ldso.prev) {
+			tail->next = &ldso;
+			ldso.prev = tail;
+			tail = ldso.next ? ldso.next : &ldso;
 		}
-		return ldso;
+		return &ldso;
 	}
 	if (strchr(name, '/')) {
 		pathname = name;
@@ -752,13 +727,13 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 			if (!sys_path) {
 				char *prefix = 0;
 				size_t prefix_len;
-				if (ldso->name[0]=='/') {
+				if (ldso.name[0]=='/') {
 					char *s, *t, *z;
-					for (s=t=z=ldso->name; *s; s++)
+					for (s=t=z=ldso.name; *s; s++)
 						if (*s=='/') z=t, t=s;
-					prefix_len = z-ldso->name;
+					prefix_len = z-ldso.name;
 					if (prefix_len < PATH_MAX)
-						prefix = ldso->name;
+						prefix = ldso.name;
 				}
 				if (!prefix) {
 					prefix = "";
@@ -910,21 +885,40 @@ static void make_global(struct dso *p)
 	for (; p; p=p->next) p->global = 1;
 }
 
+static void do_mips_relocs(struct dso *p, size_t *got)
+{
+	size_t i, j, rel[2];
+	unsigned char *base = p->base;
+	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
+	if (p->rel_early_relative) {
+		got += i;
+	} else {
+		while (i--) *got++ += (size_t)base;
+	}
+	j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
+	i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
+	Sym *sym = p->syms + j;
+	rel[0] = (unsigned char *)got - base;
+	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
+		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
+		do_relocs(p, rel, sizeof rel, 2);
+	}
+}
+
 static void reloc_all(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
 	for (; p; p=p->next) {
 		if (p->relocated) continue;
 		decode_vec(p->dynv, dyn, DYN_CNT);
-#ifdef NEED_ARCH_RELOCS
-		do_arch_relocs(p, head);
-#endif
+		if (NEED_MIPS_GOT_RELOCS)
+			do_mips_relocs(p, (void *)(p->base+dyn[DT_PLTGOT]));
 		do_relocs(p, (void *)(p->base+dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
 			2+(dyn[DT_PLTREL]==DT_RELA));
 		do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
 		do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
 
-		if (p->relro_start != p->relro_end &&
+		if (head != &ldso && p->relro_start != p->relro_end &&
 		    mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
 			error("Error relocating %s: RELRO protection failed: %m",
 				p->name);
@@ -1121,19 +1115,52 @@ static void update_tls_size()
 	tls_align);
 }
 
-void *__dynlink(int argc, char **argv)
+/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
+ * following stage 2 and stage 3 functions via primitive symbolic lookup
+ * since it does not have access to their addresses to begin with. */
+
+/* Stage 2 of the dynamic linker is called after relative relocations 
+ * have been processed. It can make function calls to static functions
+ * and access string literals and static data, but cannot use extern
+ * symbols. Its job is to perform symbolic relocations on the dynamic
+ * linker itself, but some of the relocations performed may need to be
+ * replaced later due to copy relocations in the main program. */
+
+void __dls2(unsigned char *base)
 {
-	size_t aux[AUX_CNT] = {0};
+	Ehdr *ehdr = (void *)base;
+	ldso.base = base;
+	ldso.name = ldso.shortname = "libc.so";
+	ldso.global = 1;
+	ldso.phnum = ehdr->e_phnum;
+	ldso.phdr = (void *)(base + ehdr->e_phoff);
+	ldso.phentsize = ehdr->e_phentsize;
+	ldso.rel_early_relative = 1;
+	kernel_mapped_dso(&ldso);
+	decode_dyn(&ldso);
+
+	head = &ldso;
+	reloc_all(&ldso);
+
+	ldso.relocated = 0;
+	ldso.rel_update_got = 1;
+}
+
+/* Stage 3 of the dynamic linker is called with the dynamic linker/libc
+ * fully functional. Its job is to load (if not already loaded) and
+ * process dependencies and relocations for the main application and
+ * transfer control to its entry point. */
+
+_Noreturn void __dls3(size_t *sp)
+{
+	static struct dso app, vdso;
+	size_t aux[AUX_CNT] = {0}, *auxv;
 	size_t i;
-	Phdr *phdr;
-	Ehdr *ehdr;
-	static struct dso builtin_dsos[3];
-	struct dso *const app = builtin_dsos+0;
-	struct dso *const lib = builtin_dsos+1;
-	struct dso *const vdso = builtin_dsos+2;
 	char *env_preload=0;
 	size_t vdso_base;
-	size_t *auxv;
+	int argc = *sp;
+	char **argv = (void *)(sp+1);
+	char **argv_orig = argv;
 	char **envp = argv+argc+1;
 	void *initial_tls;
 
@@ -1157,60 +1184,42 @@ void *__dynlink(int argc, char **argv)
 	libc.page_size = aux[AT_PAGESZ];
 	libc.auxv = auxv;
 
-	/* If the dynamic linker was invoked as a program itself, AT_BASE
-	 * will not be set. In that case, we assume the base address is
-	 * the start of the page containing the PHDRs; I don't know any
-	 * better approach... */
-	if (!aux[AT_BASE]) {
-		aux[AT_BASE] = aux[AT_PHDR] & -PAGE_SIZE;
-		aux[AT_PHDR] = aux[AT_PHENT] = aux[AT_PHNUM] = 0;
-	}
-
-	/* The dynamic linker load address is passed by the kernel
-	 * in the AUX vector, so this is easy. */
-	lib->base = (void *)aux[AT_BASE];
-	lib->name = lib->shortname = "libc.so";
-	lib->global = 1;
-	ehdr = (void *)lib->base;
-	lib->phnum = ehdr->e_phnum;
-	lib->phdr = (void *)(aux[AT_BASE]+ehdr->e_phoff);
-	lib->phentsize = ehdr->e_phentsize;
-	kernel_mapped_dso(lib);
-	decode_dyn(lib);
-
-	if (aux[AT_PHDR]) {
+	/* If the main program was already loaded by the kernel,
+	 * AT_PHDR will point to some location other than the dynamic
+	 * linker's program headers. */
+	if (aux[AT_PHDR] != (size_t)ldso.phdr) {
 		size_t interp_off = 0;
 		size_t tls_image = 0;
 		/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
-		app->phdr = phdr = (void *)aux[AT_PHDR];
-		app->phnum = aux[AT_PHNUM];
-		app->phentsize = aux[AT_PHENT];
+		Phdr *phdr = app.phdr = (void *)aux[AT_PHDR];
+		app.phnum = aux[AT_PHNUM];
+		app.phentsize = aux[AT_PHENT];
 		for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
 			if (phdr->p_type == PT_PHDR)
-				app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
+				app.base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
 			else if (phdr->p_type == PT_INTERP)
 				interp_off = (size_t)phdr->p_vaddr;
 			else if (phdr->p_type == PT_TLS) {
 				tls_image = phdr->p_vaddr;
-				app->tls_len = phdr->p_filesz;
-				app->tls_size = phdr->p_memsz;
-				app->tls_align = phdr->p_align;
+				app.tls_len = phdr->p_filesz;
+				app.tls_size = phdr->p_memsz;
+				app.tls_align = phdr->p_align;
 			}
 		}
-		if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
-		if (interp_off) lib->name = (char *)app->base + interp_off;
+		if (app.tls_size) app.tls_image = (char *)app.base + tls_image;
+		if (interp_off) ldso.name = (char *)app.base + interp_off;
 		if ((aux[0] & (1UL<<AT_EXECFN))
 		    && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
-			app->name = (char *)aux[AT_EXECFN];
+			app.name = (char *)aux[AT_EXECFN];
 		else
-			app->name = argv[0];
-		kernel_mapped_dso(app);
+			app.name = argv[0];
+		kernel_mapped_dso(&app);
 	} else {
 		int fd;
 		char *ldname = argv[0];
 		size_t l = strlen(ldname);
 		if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
-		*argv++ = (void *)-1;
+		argv++;
 		while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
 			char *opt = argv[0]+2;
 			*argv++ = (void *)-1;
@@ -1229,8 +1238,8 @@ void *__dynlink(int argc, char **argv)
 			} else {
 				argv[0] = 0;
 			}
-			argv[-1] = (void *)-1;
 		}
+		argv[-1] = (void *)(argc - (argv-argv_orig));
 		if (!argv[0]) {
 			dprintf(2, "musl libc\n"
 				"Version %s\n"
@@ -1246,96 +1255,88 @@ void *__dynlink(int argc, char **argv)
 			_exit(1);
 		}
 		runtime = 1;
-		ehdr = (void *)map_library(fd, app);
+		Ehdr *ehdr = (void *)map_library(fd, &app);
 		if (!ehdr) {
 			dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
 			_exit(1);
 		}
 		runtime = 0;
 		close(fd);
-		lib->name = ldname;
-		app->name = argv[0];
-		aux[AT_ENTRY] = (size_t)app->base + ehdr->e_entry;
+		ldso.name = ldname;
+		app.name = argv[0];
+		aux[AT_ENTRY] = (size_t)app.base + ehdr->e_entry;
 		/* Find the name that would have been used for the dynamic
 		 * linker had ldd not taken its place. */
 		if (ldd_mode) {
-			for (i=0; i<app->phnum; i++) {
-				if (app->phdr[i].p_type == PT_INTERP)
-					lib->name = (void *)(app->base
-						+ app->phdr[i].p_vaddr);
+			for (i=0; i<app.phnum; i++) {
+				if (app.phdr[i].p_type == PT_INTERP)
+					ldso.name = (void *)(app.base
+						+ app.phdr[i].p_vaddr);
 			}
-			dprintf(1, "\t%s (%p)\n", lib->name, lib->base);
+			dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
 		}
 	}
-	if (app->tls_size) {
-		app->tls_id = tls_cnt = 1;
+	if (app.tls_size) {
+		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
-		app->tls_offset = 0;
-		tls_offset = app->tls_size
-			+ ( -((uintptr_t)app->tls_image + app->tls_size)
-			& (app->tls_align-1) );
+		app.tls_offset = 0;
+		tls_offset = app.tls_size
+			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+			& (app.tls_align-1) );
 #else
-		tls_offset = app->tls_offset = app->tls_size
-			+ ( -((uintptr_t)app->tls_image + app->tls_size)
-			& (app->tls_align-1) );
+		tls_offset = app.tls_offset = app.tls_size
+			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+			& (app.tls_align-1) );
 #endif
-		tls_align = MAXP2(tls_align, app->tls_align);
+		tls_align = MAXP2(tls_align, app.tls_align);
 	}
-	app->global = 1;
-	decode_dyn(app);
+	app.global = 1;
+	decode_dyn(&app);
 
 	/* Attach to vdso, if provided by the kernel */
 	if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
-		ehdr = (void *)vdso_base;
-		vdso->phdr = phdr = (void *)(vdso_base + ehdr->e_phoff);
-		vdso->phnum = ehdr->e_phnum;
-		vdso->phentsize = ehdr->e_phentsize;
+		Ehdr *ehdr = (void *)vdso_base;
+		Phdr *phdr = vdso.phdr = (void *)(vdso_base + ehdr->e_phoff);
+		vdso.phnum = ehdr->e_phnum;
+		vdso.phentsize = ehdr->e_phentsize;
 		for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
 			if (phdr->p_type == PT_DYNAMIC)
-				vdso->dynv = (void *)(vdso_base + phdr->p_offset);
+				vdso.dynv = (void *)(vdso_base + phdr->p_offset);
 			if (phdr->p_type == PT_LOAD)
-				vdso->base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
+				vdso.base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
 		}
-		vdso->name = "";
-		vdso->shortname = "linux-gate.so.1";
-		vdso->global = 1;
-		decode_dyn(vdso);
-		vdso->prev = lib;
-		lib->next = vdso;
+		vdso.name = "";
+		vdso.shortname = "linux-gate.so.1";
+		vdso.global = 1;
+		vdso.relocated = 1;
+		decode_dyn(&vdso);
+		vdso.prev = &ldso;
+		ldso.next = &vdso;
 	}
 
-	/* Initial dso chain consists only of the app. We temporarily
-	 * append the dynamic linker/libc so we can relocate it, then
-	 * restore the initial chain in preparation for loading third
-	 * party libraries (preload/needed). */
-	head = tail = app;
-	ldso = lib;
-	app->next = lib;
-	reloc_all(lib);
-	app->next = 0;
-
-	/* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
+	/* Initial dso chain consists only of the app. */
+	head = tail = &app;
 
 	/* Donate unused parts of app and library mapping to malloc */
-	reclaim_gaps(app);
-	reclaim_gaps(lib);
+	reclaim_gaps(&app);
+	reclaim_gaps(&ldso);
 
 	/* Load preload/needed libraries, add their symbols to the global
-	 * namespace, and perform all remaining relocations. The main
-	 * program must be relocated LAST since it may contain copy
-	 * relocations which depend on libraries' relocations. */
+	 * namespace, and perform all remaining relocations. */
 	if (env_preload) load_preload(env_preload);
-	load_deps(app);
-	make_global(app);
+	load_deps(&app);
+	make_global(&app);
 
 #ifndef DYNAMIC_IS_RO
-	for (i=0; app->dynv[i]; i+=2)
-		if (app->dynv[i]==DT_DEBUG)
-			app->dynv[i+1] = (size_t)&debug;
+	for (i=0; app.dynv[i]; i+=2)
+		if (app.dynv[i]==DT_DEBUG)
+			app.dynv[i+1] = (size_t)&debug;
 #endif
 
-	reloc_all(app->next);
-	reloc_all(app);
+	/* The main program must be relocated LAST since it may contin
+	 * copy relocations which depend on libraries' relocations. */
+	reloc_all(app.next);
+	reloc_all(&app);
 
 	update_tls_size();
 	if (libc.tls_size > sizeof builtin_tls) {
@@ -1359,14 +1360,13 @@ void *__dynlink(int argc, char **argv)
 
 	/* Switch to runtime mode: any further failures in the dynamic
 	 * linker are a reportable failure rather than a fatal startup
-	 * error. If the dynamic loader (dlopen) will not be used, free
-	 * all memory used by the dynamic linker. */
+	 * error. */
 	runtime = 1;
 
 	debug.ver = 1;
 	debug.bp = _dl_debug_state;
 	debug.head = head;
-	debug.base = lib->base;
+	debug.base = ldso.base;
 	debug.state = 0;
 	_dl_debug_state();
 
@@ -1375,7 +1375,8 @@ void *__dynlink(int argc, char **argv)
 	errno = 0;
 	do_init_fini(tail);
 
-	return (void *)aux[AT_ENTRY];
+	CRTJMP((void *)aux[AT_ENTRY], argv-1);
+	for(;;);
 }
 
 void *dlopen(const char *file, int mode)
diff --git a/src/ldso/i386/start.s b/src/ldso/i386/start.s
deleted file mode 100644
index c37a1faa..00000000
--- a/src/ldso/i386/start.s
+++ /dev/null
@@ -1,22 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	xor %ebp,%ebp
-	pop %edi
-	mov %esp,%esi
-	and $-16,%esp
-	push %ebp
-	push %ebp
-	push %esi
-	push %edi
-	call __dynlink
-	mov %esi,%esp
-1:	dec %edi
-	pop %esi
-	cmp $-1,%esi
-	jz 1b
-	inc %edi
-	push %esi
-	push %edi
-	xor %edx,%edx
-	jmp *%eax
diff --git a/src/ldso/microblaze/start.s b/src/ldso/microblaze/start.s
deleted file mode 100644
index 067e8613..00000000
--- a/src/ldso/microblaze/start.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# FIXME: clearing argv entries
-.global _dlstart
-_dlstart:
-	add     r19, r0, r0
-
-	lw      r5, r0, r1
-	addi    r6, r1, 4
-	mfs     r7, rpc
-	addi    r7, r7, _GLOBAL_OFFSET_TABLE_+8
-	addi    r7, r7, _DYNAMIC@GOTOFF
-	brlid   r15, __reloc_self@PLT
-	addik   r1, r1, -16
-
-	lwi     r5, r1, 16
-	brlid   r15, __dynlink@PLT
-	addi    r6, r1, 20
-	addik   r1, r1, 16
-
-	lwi     r4, r1, 0
-1:	lwi     r5, r1, 4
-	addi    r5, r5, 1
-	bnei    r5, 1f
-	addi    r4, r4, -1
-	addi    r1, r1, 4
-	bri     1b
-1:	swi     r4, r1, 0
-	add     r5, r0, r0
-	bra     r3
diff --git a/src/ldso/mips/start.s b/src/ldso/mips/start.s
deleted file mode 100644
index 0cadbf8a..00000000
--- a/src/ldso/mips/start.s
+++ /dev/null
@@ -1,46 +0,0 @@
-.hidden _DYNAMIC
-.hidden __reloc_self
-.set noreorder
-.set nomacro
-.global _dlstart
-.type _dlstart,@function
-_dlstart:
-	move $fp, $0
-
-	bgezal $0, 1f
-	nop
-2:	.gpword 2b
-	.gpword _DYNAMIC
-	.gpword __reloc_self
-1:	lw $gp, 0($ra)
-	subu $gp, $ra, $gp
-
-	lw $4, 0($sp)
-	addiu $5, $sp, 4
-	lw $6, 4($ra)
-	addu $6, $6, $gp
-	addiu $7, $gp, -0x7ff0
-	subu $sp, $sp, 16
-	lw $25, 8($ra)
-	add $25, $25, $gp
-	jalr $25
-	nop
-
-	lw $25, %call16(__dynlink)($gp)
-	lw $4, 16($sp)
-	addiu $5, $sp, 20
-	jalr $25
-	nop
-
-	add $sp, $sp, 16
-	li $6, -1
-	lw $4, ($sp)
-1:	lw $5, 4($sp)
-	bne $5, $6, 2f
-	nop
-	addu $sp, $sp, 4
-	addu $4, $4, -1
-	b 1b
-	nop
-2:	sw $4, ($sp)
-	jr $2
diff --git a/src/ldso/or1k/start.s b/src/ldso/or1k/start.s
deleted file mode 100644
index 83b7c2c6..00000000
--- a/src/ldso/or1k/start.s
+++ /dev/null
@@ -1,34 +0,0 @@
-.global _dlstart
-_dlstart:
-	l.jal	1f
-	 l.nop
-1:	l.movhi	r5, gotpchi(_GLOBAL_OFFSET_TABLE_+0)
-	l.ori	r5, r5, gotpclo(_GLOBAL_OFFSET_TABLE_+4)
-	l.add	r5, r5, r9
-	l.movhi	r3, gotoffhi(_DYNAMIC)
-	l.ori	r3, r3, gotofflo(_DYNAMIC)
-	l.add	r5, r5, r3
-
-	l.lwz	r3, 0(r1)
-	l.addi	r4, r1, 4
-	l.jal	plt(__reloc_self)
-	 l.addi	r1, r1, -16
-
-	l.lwz	r3, 16(r1)
-	l.jal	plt(__dynlink)
-	 l.addi	r4, r1, 20
-	l.addi	r1, r1, 16
-
-	l.lwz	r4, 0(r1)
-1:	l.addi	r4, r4, -1
-	l.lwz	r5, 4(r1)
-	l.sfeqi	r5, -1
-	l.bf	1b
-	 l.addi	r1, r1, 4
-
-	l.addi	r4, r4, 1
-	l.addi	r1, r1, -4
-	l.sw	0(r1), r4
-
-	l.jr	r11
-	 l.ori	r3, r0, 0
diff --git a/src/ldso/powerpc/start.s b/src/ldso/powerpc/start.s
deleted file mode 100644
index 6548d58f..00000000
--- a/src/ldso/powerpc/start.s
+++ /dev/null
@@ -1,29 +0,0 @@
-	.global _dlstart
-	.type   _dlstart,@function
-_dlstart:
-	bl      1f
-2:	.long   _DYNAMIC-2b
-1:	mflr    5
-	lwz     0, 0(5)
-	add     5, 0, 5
-	lwz     3, 0(1)
-	addi    4, 1, 4
-	addi    1, 1, -16
-	bl      __reloc_self
-
-	lwz     3, 16(1)
-	addi    4, 1, 20
-	bl      __dynlink
-	addi    1, 1, 16
-
-	lwz     4, 0(1)
-1:	addi    4, 4, -1
-	lwzu    5, 4(1)
-	cmpwi   5, -1
-	beq-    1b
-	addi    4, 4, 1
-	stwu    4, -4(1)
-
-	mtlr    3
-	li      3, 0
-	blr
diff --git a/src/ldso/sh/start.s b/src/ldso/sh/start.s
deleted file mode 100644
index 0d2d9136..00000000
--- a/src/ldso/sh/start.s
+++ /dev/null
@@ -1,26 +0,0 @@
-.text
-.global _dlstart
-.type   _dlstart, @function
-_dlstart:
-	mov.l  @r15, r4
-	mov    r15, r5
-	mov.l  L1, r0
-	bsrf   r0
-	 add   #4, r5
-
-2:	mov    r0, r2
-	mov.l  @r15+, r1
-1:	mov.l  @r15+, r0
-	cmp/eq #-1, r0
-	bt/s   1b
-	 add   #-1, r1
-
-	add    #1, r1
-	mov.l  r0, @-r15
-	mov.l  r1, @-r15
-	mov    #0, r4
-	jmp    @r2
-	 nop
-
-.align 2
-L1:	.long __dynlink@PLT-(2b-.)
diff --git a/src/ldso/start.c b/src/ldso/start.c
deleted file mode 100644
index 3471f6ce..00000000
--- a/src/ldso/start.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <stdlib.h>
-
-/* stub for archs that lack dynamic linker support */
-
-void _dlstart()
-{
-	_Exit(1);
-}
diff --git a/src/ldso/x32/start.s b/src/ldso/x32/start.s
deleted file mode 100644
index 3c3800aa..00000000
--- a/src/ldso/x32/start.s
+++ /dev/null
@@ -1,24 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	mov (%rsp),%rdi  /* move argc into 1st argument slot */
-	lea 4(%rsp),%rsi /* move argv into 2nd argument slot */
-	call __dynlink
-	/* in case the dynlinker was called directly, it sets the "consumed"
-	   argv values to -1. so we must loop over the array as long as -1
-	   is in the top argv slot, decrement argc, and then set the stackpointer
-	   to the new argc as well as argc's new value.
-	   as the x32 abi has longs in the argv array, we cannot use push/pop.*/
-	movl (%rsp),%edi /* copy argc into edi */
-	xor %rdx,%rdx /* we use rdx as an offset to the current argv member */
-1:	dec %edi
-	addl $4, %edx
-	movl (%rsp, %rdx), %esi
-	cmp $-1,%esi
-	jz 1b
-	inc %edi
-	subl $4, %edx
-	lea (%rsp, %rdx), %rsp /* set rsp to new argv[-1] */
-	movl %edi, (%rsp)      /* write new argc there */
-	xor %edx,%edx
-	jmp *%rax
diff --git a/src/ldso/x86_64/start.s b/src/ldso/x86_64/start.s
deleted file mode 100644
index 1c5598aa..00000000
--- a/src/ldso/x86_64/start.s
+++ /dev/null
@@ -1,16 +0,0 @@
-.text
-.global _dlstart
-_dlstart:
-	mov (%rsp),%rdi
-	lea 8(%rsp),%rsi
-	call __dynlink
-	pop %rdi
-1:	dec %edi
-	pop %rsi
-	cmp $-1,%rsi
-	jz 1b
-	inc %edi
-	push %rsi
-	push %rdi
-	xor %edx,%edx
-	jmp *%rax