diff options
author | Rich Felker <dalias@aerifal.cx> | 2016-01-21 19:28:15 +0000 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2016-01-21 19:43:04 +0000 |
commit | 61b1e75f7d8004461f2e18f171c26c2f545eed32 (patch) | |
tree | 7a88f8d51ee3f049745b147b7f36fc8ee22e0e62 /src/thread/sh/atomics.s | |
parent | 1315596b510189b5159e742110b504177bdd4932 (diff) | |
download | musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.tar.gz musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.tar.xz musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.zip |
overhaul sh atomics for new atomics framework, add j-core cas.l backend
sh needs runtime-selected atomic backends since there are a number of supported models that use non-forwards-compatible (non-smp-compatible) atomic mechanisms. previously, the code paths for this were highly inefficient since they involved C function calls with multiple branches in the callee and heavy spills in the caller. the new code performs calls the runtime-selected asm fragment from inline asm with extremely minimal clobbers, rather than using a function call. for the sh4a case where the atomic mechanism is known and there is no forward-compatibility issue, the movli.l and movco.l instructions are provided as a_ll and a_sc, allowing the new shared atomic.h to generate efficient inline versions of all the basic atomic operations without needing a cas loop.
Diffstat (limited to 'src/thread/sh/atomics.s')
-rw-r--r-- | src/thread/sh/atomics.s | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/src/thread/sh/atomics.s b/src/thread/sh/atomics.s new file mode 100644 index 00000000..3b58cccc --- /dev/null +++ b/src/thread/sh/atomics.s @@ -0,0 +1,65 @@ +/* Contract for all versions is same as cas.l r2,r3,@r0 + * pr and r1 are also clobbered (by jsr & r1 as temp). + * r0,r2,r4-r15 must be preserved. + * r3 contains result (==r2 iff cas succeeded). */ + + .align 2 +.global __sh_cas_gusa +.hidden __sh_cas_gusa +__sh_cas_gusa: + mov.l r5,@-r15 + mov.l r4,@-r15 + mov r0,r4 + mova 1f,r0 + mov r15,r1 + mov #(0f-1f),r15 +0: mov.l @r4,r5 + cmp/eq r5,r2 + bf 1f + mov.l r3,@r4 +1: mov r1,r15 + mov r5,r3 + mov r4,r0 + mov.l @r15+,r4 + rts + mov.l @r15+,r5 + +.global __sh_cas_llsc +.hidden __sh_cas_llsc +__sh_cas_llsc: + mov r0,r1 + synco +0: movli.l @r1,r0 + cmp/eq r0,r2 + bf 1f + mov r3,r0 + movco.l r0,@r1 + bf 0b + mov r2,r0 +1: synco + mov r0,r3 + rts + mov r1,r0 + +.global __sh_cas_imask +.hidden __sh_cas_imask +__sh_cas_imask: + mov r0,r1 + stc sr,r0 + mov.l r0,@-r15 + or #0xf0,r0 + ldc r0,sr + mov.l @r1,r0 + cmp/eq r0,r2 + bf 1f + mov.l r3,@r1 +1: ldc.l @r15+,sr + mov r0,r3 + rts + mov r1,r0 + +.global __sh_cas_cas_l +.hidden __sh_cas_cas_l +__sh_cas_cas_l: + rts + .word 0x2323 /* cas.l r2,r3,@r0 */ |