diff options
Diffstat (limited to 'nptl/pthread_spin_lock.c')
-rw-r--r-- | nptl/pthread_spin_lock.c | 71 |
1 files changed, 41 insertions, 30 deletions
diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c index 4d03b7893a..682af80240 100644 --- a/nptl/pthread_spin_lock.c +++ b/nptl/pthread_spin_lock.c @@ -19,27 +19,35 @@ #include <atomic.h> #include "pthreadP.h" -/* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG - to the number of plain reads that it's optimal to spin on between uses - of atomic_compare_and_exchange_val_acq. If spinning forever is optimal - then use -1. If no plain reads here would ever be optimal, use 0. */ -#ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG -# warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG -# define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000 -#endif - int pthread_spin_lock (pthread_spinlock_t *lock) { - /* atomic_exchange usually takes less instructions than - atomic_compare_and_exchange. On the other hand, - atomic_compare_and_exchange potentially generates less bus traffic - when the lock is locked. - We assume that the first try mostly will be successful, and we use - atomic_exchange. For the subsequent tries we use - atomic_compare_and_exchange. */ - if (atomic_exchange_acq (lock, 1) == 0) + int val = 0; + + /* We assume that the first try mostly will be successful, thus we use + atomic_exchange if it is not implemented by a CAS loop (we also assume + that atomic_exchange can be faster if it succeeds, see + ATOMIC_EXCHANGE_USES_CAS). Otherwise, we use a weak CAS and not an + exchange so we bail out after the first failed attempt to change the + state. For the subsequent attempts we use atomic_compare_and_exchange + after we observe that the lock is not acquired. + See also comment in pthread_spin_trylock. + We use acquire MO to synchronize-with the release MO store in + pthread_spin_unlock, and thus ensure that prior critical sections + happen-before this critical section. */ +#if ! ATOMIC_EXCHANGE_USES_CAS + /* Try to acquire the lock with an exchange instruction as this architecture + has such an instruction and we assume it is faster than a CAS. + The acquisition succeeds if the lock is not in an acquired state. */ + if (__glibc_likely (atomic_exchange_acquire (lock, 1) == 0)) return 0; +#else + /* Try to acquire the lock with a CAS instruction as this architecture + has no exchange instruction. The acquisition succeeds if the lock is not + acquired. */ + if (__glibc_likely (atomic_compare_exchange_weak_acquire (lock, &val, 1))) + return 0; +#endif do { @@ -47,23 +55,26 @@ pthread_spin_lock (pthread_spinlock_t *lock) to cmpxchg is not a good idea on many targets as that will force expensive memory synchronizations among processors and penalize other running threads. - On the other hand, we do want to update memory state on the local core - once in a while to avoid spinning indefinitely until some event that - will happen to update local memory as a side-effect. */ - if (SPIN_LOCK_READS_BETWEEN_CMPXCHG >= 0) + There is no technical reason for throwing in a CAS every now and then, + and so far we have no evidence that it can improve performance. + If that would be the case, we have to adjust other spin-waiting loops + elsewhere, too! + Thus we use relaxed MO reads until we observe the lock to not be + acquired anymore. */ + do { - int wait = SPIN_LOCK_READS_BETWEEN_CMPXCHG; + /* TODO Back-off. */ - while (*lock != 0 && wait > 0) - --wait; - } - else - { - while (*lock != 0) - ; + atomic_spin_nop (); + + val = atomic_load_relaxed (lock); } + while (val != 0); + + /* We need acquire memory order here for the same reason as mentioned + for the first try to lock the spinlock. */ } - while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0); + while (!atomic_compare_exchange_weak_acquire (lock, &val, 1)); return 0; } |