fix arm atomic store and generate simpler/less-bloated/faster code

atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler.
author: Rich Felker <dalias@aerifal.cx> 2013-09-22 03:06:17 -0400
committer: Rich Felker <dalias@aerifal.cx> 2013-09-22 03:06:17 -0400
commit: 35a6801c6cd31b8ace4a7c7fc138170434b6754f (patch)
tree: e1dc90e121b1850fd7a2cc9ffd8670368e3db47a
parent: e803829e6b087c0ed91adc11f87185109bc59b31 (diff)
download: musl-35a6801c6cd31b8ace4a7c7fc138170434b6754f.tar.gz
musl-35a6801c6cd31b8ace4a7c7fc138170434b6754f.tar.xz
musl-35a6801c6cd31b8ace4a7c7fc138170434b6754f.zip
1 files changed, 8 insertions, 6 deletions
diff --git a/arch/arm/atomic.h b/arch/arm/atomic.h
index 94efe5a0..734d2871 100644
--- a/arch/arm/atomic.h
+++ b/arch/arm/atomic.h
@@ -22,11 +22,13 @@ static inline int a_ctz_64(uint64_t x)
 	return a_ctz_l(y);
 }
 
+#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0)
+
 static inline int a_cas(volatile int *p, int t, int s)
 {
 	int old;
 	for (;;) {
-		if (!((int (*)(int, int, volatile int *))0xffff0fc0)(t, s, p))
+		if (!__k_cas(t, s, p))
 			return t;
 		if ((old=*p) != t)
 			return old;
@@ -47,7 +49,7 @@ static inline int a_swap(volatile int *x, int v)
 {
 	int old;
 	do old = *x;
-	while (a_cas(x, old, v) != old);
+	while (__k_cas(old, v, x));
 	return old;
 }
 
@@ -55,7 +57,7 @@ static inline int a_fetch_add(volatile int *x, int v)
 {
 	int old;
 	do old = *x;
-	while (a_cas(x, old, old+v) != old);
+	while (__k_cas(old, old+v, x));
 	return old;
 }
 
@@ -71,7 +73,7 @@ static inline void a_dec(volatile int *x)
 
 static inline void a_store(volatile int *p, int x)
 {
-	*p=x;
+	while (__k_cas(*p, x, p));
 }
 
 static inline void a_spin()
@@ -87,14 +89,14 @@ static inline void a_and(volatile int *p, int v)
 {
 	int old;
 	do old = *p;
-	while (a_cas(p, old, old&v) != old);
+	while (__k_cas(old, old&v, p));
 }
 
 static inline void a_or(volatile int *p, int v)
 {
 	int old;
 	do old = *p;
-	while (a_cas(p, old, old|v) != old);
+	while (__k_cas(old, old|v, p));
 }
 
 static inline void a_or_l(volatile void *p, long v)
author	Rich Felker <dalias@aerifal.cx>	2013-09-22 03:06:17 -0400
committer	Rich Felker <dalias@aerifal.cx>	2013-09-22 03:06:17 -0400
commit	35a6801c6cd31b8ace4a7c7fc138170434b6754f (patch)
tree	e1dc90e121b1850fd7a2cc9ffd8670368e3db47a
parent	e803829e6b087c0ed91adc11f87185109bc59b31 (diff)
download	musl-35a6801c6cd31b8ace4a7c7fc138170434b6754f.tar.gz musl-35a6801c6cd31b8ace4a7c7fc138170434b6754f.tar.xz musl-35a6801c6cd31b8ace4a7c7fc138170434b6754f.zip