about summary refs log tree commit diff
path: root/src/ldso
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2018-10-01 18:37:02 -0400
committerRich Felker <dalias@aerifal.cx>2018-10-01 18:37:02 -0400
commit0beb9dfbecad38af9759b1e83eeb007e28b70abb (patch)
tree75c42c8b544a480ba0a7664ebe83d88de654cd60 /src/ldso
parent4d0a82170a25464c39522d7190b9fe302045ddb2 (diff)
downloadmusl-0beb9dfbecad38af9759b1e83eeb007e28b70abb.tar.gz
musl-0beb9dfbecad38af9759b1e83eeb007e28b70abb.tar.xz
musl-0beb9dfbecad38af9759b1e83eeb007e28b70abb.zip
add TLSDESC support for 32-bit arm
unlike other asm where the baseline ISA is used, these functions are
hot paths and use ISA-level specializations.

call-clobbered vfp registers are saved before calling __tls_get_new,
since there is no guarantee it won't use them. while setjmp/longjmp
have to use hwcap to decide whether to the fpu is in use, since
application code could be using vfp registers even if libc was
compiled as pure softfloat, __tls_get_new is part of libc and can be
assumed not to have access to vfp registers if tlsdesc.S does not.
thus it suffices just to check the predefined preprocessor macros. the
check for __ARM_PCS_VFP is redundant; !__SOFTFP__ must always be true
if the target ISA level includes fpu instructions/registers.
Diffstat (limited to 'src/ldso')
-rw-r--r--src/ldso/arm/tlsdesc.S62
1 files changed, 62 insertions, 0 deletions
diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S
new file mode 100644
index 00000000..f3d67fce
--- /dev/null
+++ b/src/ldso/arm/tlsdesc.S
@@ -0,0 +1,62 @@
+.syntax unified
+
+.text
+.global __tlsdesc_static
+.hidden __tlsdesc_static
+.type __tlsdesc_static,%function
+__tlsdesc_static:
+	ldr r0,[r0]
+	bx lr
+
+.hidden __tls_get_new
+
+.global __tlsdesc_dynamic
+.hidden __tlsdesc_dynamic
+.type __tlsdesc_dynamic,%function
+__tlsdesc_dynamic:
+	push {r2,r3,ip,lr}
+	ldr r1,[r0]
+	ldr r2,[r1,#4]  // r2 = offset
+	ldr r1,[r1]     // r1 = modid
+
+	ldr r0,1f
+	add r0,r0,pc
+	ldr r0,[r0]
+2:
+#if __ARM_ARCH >= 5
+	blx r0          // r0 = tp
+#else
+	mov lr,pc
+	bx r0
+#endif
+	ldr r3,[r0,#-4] // r3 = dtv
+	ldr ip,[r3]     // ip = dtv slot count
+	cmp r1,ip
+	bhi 3f
+	ldr ip,[r3,r1,LSL #2]
+	sub r0,ip,r0
+	add r0,r0,r2    // r0 = r3[r1]-r0+r2
+4:
+#if __ARM_ARCH >= 5
+	pop {r2,r3,ip,pc}
+#else
+	pop {r2,r3,ip,lr}
+	bx lr
+#endif
+
+3:
+#if __ARM_PCS_VFP || !__SOFTFP__
+	vpush {d0-d7}
+#endif
+	push {r0-r3}
+	add r0,sp,#4
+	bl __tls_get_new
+	pop {r1-r3,ip}
+#if __ARM_PCS_VFP || !__SOFTFP__
+	vpop {d0-d7}
+#endif
+	sub r0,r0,r1    // r0 = retval-tp
+	b 4b
+
+	.align 2
+1:	.word __a_gettp_ptr - 2b