summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog46
-rw-r--r--NEWS11
-rw-r--r--include/atomic.h16
-rw-r--r--nptl/pthread_spin_init.c3
-rw-r--r--nptl/pthread_spin_lock.c71
-rw-r--r--nptl/pthread_spin_trylock.c54
-rw-r--r--nptl/pthread_spin_unlock.c6
-rw-r--r--sysdeps/aarch64/atomic-machine.h1
-rw-r--r--sysdeps/aarch64/nptl/pthread_spin_lock.c24
-rw-r--r--sysdeps/alpha/atomic-machine.h3
-rw-r--r--sysdeps/arm/atomic-machine.h1
-rw-r--r--sysdeps/arm/nptl/pthread_spin_lock.c23
-rw-r--r--sysdeps/hppa/nptl/pthread_spin_lock.c23
-rw-r--r--sysdeps/i386/atomic-machine.h1
-rw-r--r--sysdeps/ia64/atomic-machine.h3
-rw-r--r--sysdeps/m68k/coldfire/atomic-machine.h3
-rw-r--r--sysdeps/m68k/m680x0/m68020/atomic-machine.h3
-rw-r--r--sysdeps/m68k/nptl/pthread_spin_lock.c24
-rw-r--r--sysdeps/microblaze/atomic-machine.h3
-rw-r--r--sysdeps/microblaze/nptl/pthread_spin_lock.c24
-rw-r--r--sysdeps/mips/atomic-machine.h13
-rw-r--r--sysdeps/mips/nptl/pthread_spin_lock.c23
-rw-r--r--sysdeps/nios2/nptl/pthread_spin_lock.c24
-rw-r--r--sysdeps/powerpc/powerpc32/atomic-machine.h1
-rw-r--r--sysdeps/powerpc/powerpc64/atomic-machine.h1
-rw-r--r--sysdeps/s390/atomic-machine.h2
-rw-r--r--sysdeps/sparc/sparc32/atomic-machine.h3
-rw-r--r--sysdeps/sparc/sparc32/sparcv9/atomic-machine.h3
-rw-r--r--sysdeps/sparc/sparc64/atomic-machine.h3
-rw-r--r--sysdeps/tile/tilegx/atomic-machine.h1
-rw-r--r--sysdeps/tile/tilepro/atomic-machine.h1
-rw-r--r--sysdeps/unix/sysv/linux/hppa/atomic-machine.h3
-rw-r--r--sysdeps/unix/sysv/linux/m68k/coldfire/atomic-machine.h3
-rw-r--r--sysdeps/unix/sysv/linux/nios2/atomic-machine.h3
-rw-r--r--sysdeps/unix/sysv/linux/sh/atomic-machine.h3
-rw-r--r--sysdeps/x86_64/atomic-machine.h1
36 files changed, 226 insertions, 205 deletions
diff --git a/ChangeLog b/ChangeLog
index 4b90a00dc6..820c928948 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,49 @@
+2017-06-06  Stefan Liebler  <stli@linux.vnet.ibm.com>
+
+	* NEWS: Mention new spinlock implementation.
+	* include/atomic.h:
+	(__atomic_val_bysize): Cast type to omit volatile qualifier.
+	(atomic_exchange_acq): Likewise.
+	(atomic_load_relaxed): Likewise.
+	(ATOMIC_EXCHANGE_USES_CAS): Check definition.
+	* nptl/pthread_spin_init.c (pthread_spin_init):
+	Use atomic_store_relaxed.
+	* nptl/pthread_spin_lock.c (pthread_spin_lock):
+	Use C11-like atomic macros.
+	* nptl/pthread_spin_trylock.c (pthread_spin_trylock):
+	Likewise.
+	* nptl/pthread_spin_unlock.c (pthread_spin_unlock):
+	Use atomic_store_release.
+	* sysdeps/aarch64/nptl/pthread_spin_lock.c: Delete File.
+	* sysdeps/arm/nptl/pthread_spin_lock.c: Likewise.
+	* sysdeps/hppa/nptl/pthread_spin_lock.c: Likewise.
+	* sysdeps/m68k/nptl/pthread_spin_lock.c: Likewise.
+	* sysdeps/microblaze/nptl/pthread_spin_lock.c: Likewise.
+	* sysdeps/mips/nptl/pthread_spin_lock.c: Likewise.
+	* sysdeps/nios2/nptl/pthread_spin_lock.c: Likewise.
+	* sysdeps/aarch64/atomic-machine.h (ATOMIC_EXCHANGE_USES_CAS): Define.
+	* sysdeps/alpha/atomic-machine.h: Likewise.
+	* sysdeps/arm/atomic-machine.h: Likewise.
+	* sysdeps/i386/atomic-machine.h: Likewise.
+	* sysdeps/ia64/atomic-machine.h: Likewise.
+	* sysdeps/m68k/coldfire/atomic-machine.h: Likewise.
+	* sysdeps/m68k/m680x0/m68020/atomic-machine.h: Likewise.
+	* sysdeps/microblaze/atomic-machine.h: Likewise.
+	* sysdeps/mips/atomic-machine.h: Likewise.
+	* sysdeps/powerpc/powerpc32/atomic-machine.h: Likewise.
+	* sysdeps/powerpc/powerpc64/atomic-machine.h: Likewise.
+	* sysdeps/s390/atomic-machine.h: Likewise.
+	* sysdeps/sparc/sparc32/atomic-machine.h: Likewise.
+	* sysdeps/sparc/sparc32/sparcv9/atomic-machine.h: Likewise.
+	* sysdeps/sparc/sparc64/atomic-machine.h: Likewise.
+	* sysdeps/tile/tilegx/atomic-machine.h: Likewise.
+	* sysdeps/tile/tilepro/atomic-machine.h: Likewise.
+	* sysdeps/unix/sysv/linux/hppa/atomic-machine.h: Likewise.
+	* sysdeps/unix/sysv/linux/m68k/coldfire/atomic-machine.h: Likewise.
+	* sysdeps/unix/sysv/linux/nios2/atomic-machine.h: Likewise.
+	* sysdeps/unix/sysv/linux/sh/atomic-machine.h: Likewise.
+	* sysdeps/x86_64/atomic-machine.h: Likewise.
+
 2017-06-05  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* sysdeps/x86/cacheinfo.c (intel_check_word): Accept cpu_features
diff --git a/NEWS b/NEWS
index 608538f77f..991ee63f7c 100644
--- a/NEWS
+++ b/NEWS
@@ -80,6 +80,17 @@ Version 2.26
 * The stack_t type no longer has the name struct sigaltstack.  This changes
   the C++ name mangling for interfaces involving this type.
 
+* The synchronization that pthread_spin_unlock performs has been changed
+  to now be equivalent to a C11 atomic store with release memory order to
+  the spin lock's memory location.  This ensures correct synchronization
+  for the spin lock's operations and critical sections protected by a spin
+  lock.  Previously, several (but not all) architectures used stronger
+  synchronization (e.g., containing what is often called a full barrier).
+  This change can improve performance, but may affect odd fringe uses of
+  spin locks that depend on the previous behavior (e.g., using spin locks
+  as atomic variables to try to implement Dekker's mutual exclusion
+  algorithm).
+
 Security related changes:
 
 * The DNS stub resolver limits the advertised UDP buffer size to 1200 bytes,
diff --git a/include/atomic.h b/include/atomic.h
index 7f326400c3..d002b117bf 100644
--- a/include/atomic.h
+++ b/include/atomic.h
@@ -54,7 +54,7 @@
    and following args.  */
 #define __atomic_val_bysize(pre, post, mem, ...)			      \
   ({									      \
-    __typeof (*mem) __atg1_result;					      \
+    __typeof ((__typeof (*(mem))) *(mem)) __atg1_result;		      \
     if (sizeof (*mem) == 1)						      \
       __atg1_result = pre##_8_##post (mem, __VA_ARGS__);		      \
     else if (sizeof (*mem) == 2)					      \
@@ -162,9 +162,9 @@
 /* Store NEWVALUE in *MEM and return the old value.  */
 #ifndef atomic_exchange_acq
 # define atomic_exchange_acq(mem, newvalue) \
-  ({ __typeof (*(mem)) __atg5_oldval;					      \
+  ({ __typeof ((__typeof (*(mem))) *(mem)) __atg5_oldval;		      \
      __typeof (mem) __atg5_memp = (mem);				      \
-     __typeof (*(mem)) __atg5_value = (newvalue);			      \
+     __typeof ((__typeof (*(mem))) *(mem)) __atg5_value = (newvalue);	      \
 									      \
      do									      \
        __atg5_oldval = *__atg5_memp;					      \
@@ -668,7 +668,7 @@ void __atomic_link_error (void);
 
 # ifndef atomic_load_relaxed
 #  define atomic_load_relaxed(mem) \
-   ({ __typeof (*(mem)) __atg100_val;					      \
+   ({ __typeof ((__typeof (*(mem))) *(mem)) __atg100_val;		      \
    __asm ("" : "=r" (__atg100_val) : "0" (*(mem)));			      \
    __atg100_val; })
 # endif
@@ -818,4 +818,12 @@ void __atomic_link_error (void);
 # define atomic_spin_nop() do { /* nothing */ } while (0)
 #endif
 
+/* ATOMIC_EXCHANGE_USES_CAS is non-zero if atomic_exchange operations
+   are implemented based on a CAS loop; otherwise, this is zero and we assume
+   that the atomic_exchange operations could provide better performance
+   than a CAS loop.  */
+#ifndef ATOMIC_EXCHANGE_USES_CAS
+# error ATOMIC_EXCHANGE_USES_CAS has to be defined.
+#endif
+
 #endif	/* atomic.h */
diff --git a/nptl/pthread_spin_init.c b/nptl/pthread_spin_init.c
index 01dec5eea4..fe3091377e 100644
--- a/nptl/pthread_spin_init.c
+++ b/nptl/pthread_spin_init.c
@@ -22,6 +22,7 @@
 int
 pthread_spin_init (pthread_spinlock_t *lock, int pshared)
 {
-  *lock = 0;
+  /* Relaxed MO is fine because this is an initializing store.  */
+  atomic_store_relaxed (lock, 0);
   return 0;
 }
diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
index 4d03b7893a..682af80240 100644
--- a/nptl/pthread_spin_lock.c
+++ b/nptl/pthread_spin_lock.c
@@ -19,27 +19,35 @@
 #include <atomic.h>
 #include "pthreadP.h"
 
-/* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG
-  to the number of plain reads that it's optimal to spin on between uses
-  of atomic_compare_and_exchange_val_acq.  If spinning forever is optimal
-  then use -1.  If no plain reads here would ever be optimal, use 0.  */
-#ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG
-# warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG
-# define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-#endif
-
 int
 pthread_spin_lock (pthread_spinlock_t *lock)
 {
-  /* atomic_exchange usually takes less instructions than
-     atomic_compare_and_exchange.  On the other hand,
-     atomic_compare_and_exchange potentially generates less bus traffic
-     when the lock is locked.
-     We assume that the first try mostly will be successful, and we use
-     atomic_exchange.  For the subsequent tries we use
-     atomic_compare_and_exchange.  */
-  if (atomic_exchange_acq (lock, 1) == 0)
+  int val = 0;
+
+  /* We assume that the first try mostly will be successful, thus we use
+     atomic_exchange if it is not implemented by a CAS loop (we also assume
+     that atomic_exchange can be faster if it succeeds, see
+     ATOMIC_EXCHANGE_USES_CAS).  Otherwise, we use a weak CAS and not an
+     exchange so we bail out after the first failed attempt to change the
+     state.  For the subsequent attempts we use atomic_compare_and_exchange
+     after we observe that the lock is not acquired.
+     See also comment in pthread_spin_trylock.
+     We use acquire MO to synchronize-with the release MO store in
+     pthread_spin_unlock, and thus ensure that prior critical sections
+     happen-before this critical section.  */
+#if ! ATOMIC_EXCHANGE_USES_CAS
+  /* Try to acquire the lock with an exchange instruction as this architecture
+     has such an instruction and we assume it is faster than a CAS.
+     The acquisition succeeds if the lock is not in an acquired state.  */
+  if (__glibc_likely (atomic_exchange_acquire (lock, 1) == 0))
     return 0;
+#else
+  /* Try to acquire the lock with a CAS instruction as this architecture
+     has no exchange instruction.  The acquisition succeeds if the lock is not
+     acquired.  */
+  if (__glibc_likely (atomic_compare_exchange_weak_acquire (lock, &val, 1)))
+    return 0;
+#endif
 
   do
     {
@@ -47,23 +55,26 @@ pthread_spin_lock (pthread_spinlock_t *lock)
 	 to cmpxchg is not a good idea on many targets as that will force
 	 expensive memory synchronizations among processors and penalize other
 	 running threads.
-	 On the other hand, we do want to update memory state on the local core
-	 once in a while to avoid spinning indefinitely until some event that
-	 will happen to update local memory as a side-effect.  */
-      if (SPIN_LOCK_READS_BETWEEN_CMPXCHG >= 0)
+	 There is no technical reason for throwing in a CAS every now and then,
+	 and so far we have no evidence that it can improve performance.
+	 If that would be the case, we have to adjust other spin-waiting loops
+	 elsewhere, too!
+	 Thus we use relaxed MO reads until we observe the lock to not be
+	 acquired anymore.  */
+      do
 	{
-	  int wait = SPIN_LOCK_READS_BETWEEN_CMPXCHG;
+	  /* TODO Back-off.  */
 
-	  while (*lock != 0 && wait > 0)
-	    --wait;
-	}
-      else
-	{
-	  while (*lock != 0)
-	    ;
+	  atomic_spin_nop ();
+
+	  val = atomic_load_relaxed (lock);
 	}
+      while (val != 0);
+
+      /* We need acquire memory order here for the same reason as mentioned
+	 for the first try to lock the spinlock.  */
     }
-  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0);
+  while (!atomic_compare_exchange_weak_acquire (lock, &val, 1));
 
   return 0;
 }
diff --git a/nptl/pthread_spin_trylock.c b/nptl/pthread_spin_trylock.c
index 593bba3ed8..83921b06b8 100644
--- a/nptl/pthread_spin_trylock.c
+++ b/nptl/pthread_spin_trylock.c
@@ -23,5 +23,57 @@
 int
 pthread_spin_trylock (pthread_spinlock_t *lock)
 {
-  return atomic_exchange_acq (lock, 1) ? EBUSY : 0;
+  /* For the spin try lock, we have the following possibilities:
+
+     1) If we assume that trylock will most likely succeed in practice:
+     * We just do an exchange.
+
+     2) If we want to bias towards cases where trylock succeeds, but don't
+     rule out contention:
+     * If exchange is not implemented by a CAS loop, and exchange is faster
+     than CAS, do an exchange.
+     * If exchange is implemented by a CAS loop, use a weak CAS and not an
+     exchange so we bail out after the first failed attempt to change the state.
+
+     3) If we expect contention to be likely:
+     * If CAS always brings the cache line into an exclusive state even if the
+     spinlock is already acquired, then load the value first with
+     atomic_load_relaxed and test if lock is not acquired. Then do 2).
+
+     We assume that 2) is the common case, and that this won't be slower than
+     1) in the common case.
+
+     We use acquire MO to synchronize-with the release MO store in
+     pthread_spin_unlock, and thus ensure that prior critical sections
+     happen-before this critical section.  */
+#if ! ATOMIC_EXCHANGE_USES_CAS
+  /* Try to acquire the lock with an exchange instruction as this architecture
+     has such an instruction and we assume it is faster than a CAS.
+     The acquisition succeeds if the lock is not in an acquired state.  */
+  if (atomic_exchange_acquire (lock, 1) == 0)
+    return 0;
+#else
+  /* Try to acquire the lock with a CAS instruction as this architecture
+     has no exchange instruction.  The acquisition succeeds if the lock is not
+     acquired.  */
+  do
+    {
+      int val = 0;
+      if (atomic_compare_exchange_weak_acquire (lock, &val, 1))
+	return 0;
+    }
+  /* atomic_compare_exchange_weak_acquire can fail spuriously.  Whereas
+     C++11 and C11 make it clear that trylock operations can fail spuriously,
+     POSIX does not explicitly specify this; it only specifies that failing
+     synchronization operations do not need to have synchronization effects
+     themselves, but a spurious failure is something that could contradict a
+     happens-before established earlier (e.g., that we need to observe that
+     the lock is acquired).  Therefore, we emulate a strong CAS by simply
+     checking with a relaxed MO load that the lock is really acquired before
+     returning EBUSY; the additional overhead this may cause is on the slow
+     path.  */
+  while (atomic_load_relaxed (lock) == 0);
+#endif
+
+  return EBUSY;
 }
diff --git a/nptl/pthread_spin_unlock.c b/nptl/pthread_spin_unlock.c
index 5fd73e578b..f83b69639a 100644
--- a/nptl/pthread_spin_unlock.c
+++ b/nptl/pthread_spin_unlock.c
@@ -23,7 +23,9 @@
 int
 pthread_spin_unlock (pthread_spinlock_t *lock)
 {
-  atomic_full_barrier ();
-  *lock = 0;
+  /* The atomic_store_release synchronizes-with the atomic_exchange_acquire
+     or atomic_compare_exchange_weak_acquire in pthread_spin_lock /
+     pthread_spin_trylock.  */
+  atomic_store_release (lock, 0);
   return 0;
 }
diff --git a/sysdeps/aarch64/atomic-machine.h b/sysdeps/aarch64/atomic-machine.h
index a5d2213d83..eb59a5b94c 100644
--- a/sysdeps/aarch64/atomic-machine.h
+++ b/sysdeps/aarch64/atomic-machine.h
@@ -38,6 +38,7 @@ typedef uintmax_t uatomic_max_t;
 
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 1
+#define ATOMIC_EXCHANGE_USES_CAS 0
 
 /* Compare and exchange.
    For all "bool" routines, we return FALSE if exchange succesful.  */
diff --git a/sysdeps/aarch64/nptl/pthread_spin_lock.c b/sysdeps/aarch64/nptl/pthread_spin_lock.c
deleted file mode 100644
index fcfcb40215..0000000000
--- a/sysdeps/aarch64/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright (C) 2008-2017 Free Software Foundation, Inc.
-
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/alpha/atomic-machine.h b/sysdeps/alpha/atomic-machine.h
index 06e93f2ddb..2cb2290837 100644
--- a/sysdeps/alpha/atomic-machine.h
+++ b/sysdeps/alpha/atomic-machine.h
@@ -45,6 +45,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 
 #ifdef UP
 # define __MB		/* nothing */
diff --git a/sysdeps/arm/atomic-machine.h b/sysdeps/arm/atomic-machine.h
index eeac7f0dbd..c22d05b766 100644
--- a/sysdeps/arm/atomic-machine.h
+++ b/sysdeps/arm/atomic-machine.h
@@ -35,6 +35,7 @@ typedef uintmax_t uatomic_max_t;
 
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 1
 
 void __arm_link_error (void);
 
diff --git a/sysdeps/arm/nptl/pthread_spin_lock.c b/sysdeps/arm/nptl/pthread_spin_lock.c
deleted file mode 100644
index 037b3b82a0..0000000000
--- a/sysdeps/arm/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (C) 2008-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/hppa/nptl/pthread_spin_lock.c b/sysdeps/hppa/nptl/pthread_spin_lock.c
deleted file mode 100644
index 14f36a69dc..0000000000
--- a/sysdeps/hppa/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (C) 2005-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/i386/atomic-machine.h b/sysdeps/i386/atomic-machine.h
index 77759f7634..0e24200617 100644
--- a/sysdeps/i386/atomic-machine.h
+++ b/sysdeps/i386/atomic-machine.h
@@ -56,6 +56,7 @@ typedef uintmax_t uatomic_max_t;
 
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 0
 
 
 #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
diff --git a/sysdeps/ia64/atomic-machine.h b/sysdeps/ia64/atomic-machine.h
index ecf97500b4..f04ac05446 100644
--- a/sysdeps/ia64/atomic-machine.h
+++ b/sysdeps/ia64/atomic-machine.h
@@ -46,6 +46,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 0
+
 
 #define __arch_compare_and_exchange_bool_8_acq(mem, newval, oldval) \
   (abort (), 0)
diff --git a/sysdeps/m68k/coldfire/atomic-machine.h b/sysdeps/m68k/coldfire/atomic-machine.h
index 9aeb993487..d2e4a1f15d 100644
--- a/sysdeps/m68k/coldfire/atomic-machine.h
+++ b/sysdeps/m68k/coldfire/atomic-machine.h
@@ -53,6 +53,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 /* The only basic operation needed is compare and exchange.  */
 #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
   ({ __typeof (mem) __gmemp = (mem);				      \
diff --git a/sysdeps/m68k/m680x0/m68020/atomic-machine.h b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
index 00dc22d79d..83238f51de 100644
--- a/sysdeps/m68k/m680x0/m68020/atomic-machine.h
+++ b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
@@ -47,6 +47,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 #define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
   ({ __typeof (*(mem)) __ret;						      \
      __asm __volatile ("cas%.b %0,%2,%1"				      \
diff --git a/sysdeps/m68k/nptl/pthread_spin_lock.c b/sysdeps/m68k/nptl/pthread_spin_lock.c
deleted file mode 100644
index 62795f4cd2..0000000000
--- a/sysdeps/m68k/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/microblaze/atomic-machine.h b/sysdeps/microblaze/atomic-machine.h
index dc5309c430..d73c69d46b 100644
--- a/sysdeps/microblaze/atomic-machine.h
+++ b/sysdeps/microblaze/atomic-machine.h
@@ -38,6 +38,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 
 /* Microblaze does not have byte and halfword forms of load and reserve and
    store conditional. So for microblaze we stub out the 8- and 16-bit forms.  */
diff --git a/sysdeps/microblaze/nptl/pthread_spin_lock.c b/sysdeps/microblaze/nptl/pthread_spin_lock.c
deleted file mode 100644
index fcfcb40215..0000000000
--- a/sysdeps/microblaze/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright (C) 2008-2017 Free Software Foundation, Inc.
-
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/mips/atomic-machine.h b/sysdeps/mips/atomic-machine.h
index 54c182b38b..16fef6b797 100644
--- a/sysdeps/mips/atomic-machine.h
+++ b/sysdeps/mips/atomic-machine.h
@@ -92,7 +92,15 @@ typedef uintmax_t uatomic_max_t;
    have no assembly alternative available and want to avoid the __sync_*
    builtins if at all possible.  */
 
-#define USE_ATOMIC_COMPILER_BUILTINS 1
+# define USE_ATOMIC_COMPILER_BUILTINS 1
+
+/* MIPS is an LL/SC machine.  However, XLP has a direct atomic exchange
+   instruction which will be used by __atomic_exchange_n.  */
+# ifdef _MIPS_ARCH_XLP
+#  define ATOMIC_EXCHANGE_USES_CAS 0
+# else
+#  define ATOMIC_EXCHANGE_USES_CAS 1
+# endif
 
 /* Compare and exchange.
    For all "bool" routines, we return FALSE if exchange succesful.  */
@@ -213,7 +221,8 @@ typedef uintmax_t uatomic_max_t;
 /* This implementation using inline assembly will be removed once glibc
    requires GCC 4.8 or later to build.  */
 
-#define USE_ATOMIC_COMPILER_BUILTINS 0
+# define USE_ATOMIC_COMPILER_BUILTINS 0
+# define ATOMIC_EXCHANGE_USES_CAS 1
 
 /* Compare and exchange.  For all of the "xxx" routines, we expect a
    "__prev" and a "__cmp" variable to be provided by the enclosing scope,
diff --git a/sysdeps/mips/nptl/pthread_spin_lock.c b/sysdeps/mips/nptl/pthread_spin_lock.c
deleted file mode 100644
index 19d87a5861..0000000000
--- a/sysdeps/mips/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/nios2/nptl/pthread_spin_lock.c b/sysdeps/nios2/nptl/pthread_spin_lock.c
deleted file mode 100644
index b203469508..0000000000
--- a/sysdeps/nios2/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* pthread spin-lock implementation for Nios II.
-   Copyright (C) 2005-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
-
-/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
-   it will resolve to this very file.  Using "sysdeps/.." as reference to the
-   top level directory does the job.  */
-#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/powerpc/powerpc32/atomic-machine.h b/sysdeps/powerpc/powerpc32/atomic-machine.h
index 20d5e85683..96c7d81359 100644
--- a/sysdeps/powerpc/powerpc32/atomic-machine.h
+++ b/sysdeps/powerpc/powerpc32/atomic-machine.h
@@ -35,6 +35,7 @@
 
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 1
 
 /*
  * The 32-bit exchange_bool is different on powerpc64 because the subf
diff --git a/sysdeps/powerpc/powerpc64/atomic-machine.h b/sysdeps/powerpc/powerpc64/atomic-machine.h
index 40c308e210..46df488b3c 100644
--- a/sysdeps/powerpc/powerpc64/atomic-machine.h
+++ b/sysdeps/powerpc/powerpc64/atomic-machine.h
@@ -35,6 +35,7 @@
 
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 1
 
 /* The 32-bit exchange_bool is different on powerpc64 because the subf
    does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
diff --git a/sysdeps/s390/atomic-machine.h b/sysdeps/s390/atomic-machine.h
index 690d2e3065..adaca40798 100644
--- a/sysdeps/s390/atomic-machine.h
+++ b/sysdeps/s390/atomic-machine.h
@@ -67,6 +67,8 @@ typedef uintmax_t uatomic_max_t;
 # define __HAVE_64B_ATOMICS 0
 #endif
 
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 /* Implement some of the non-C11 atomic macros from include/atomic.h
    with help of the C11 atomic builtins.  The other non-C11 atomic macros
    are using the macros defined here.  */
diff --git a/sysdeps/sparc/sparc32/atomic-machine.h b/sysdeps/sparc/sparc32/atomic-machine.h
index acd029ea34..a2fe8485b4 100644
--- a/sysdeps/sparc/sparc32/atomic-machine.h
+++ b/sysdeps/sparc/sparc32/atomic-machine.h
@@ -50,6 +50,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 
 /* We have no compare and swap, just test and set.
    The following implementation contends on 64 global locks
diff --git a/sysdeps/sparc/sparc32/sparcv9/atomic-machine.h b/sysdeps/sparc/sparc32/sparcv9/atomic-machine.h
index 7f7895e57b..2b36dfc94d 100644
--- a/sysdeps/sparc/sparc32/sparcv9/atomic-machine.h
+++ b/sysdeps/sparc/sparc32/sparcv9/atomic-machine.h
@@ -47,6 +47,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 0
+
 
 #define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
   (abort (), (__typeof (*mem)) 0)
diff --git a/sysdeps/sparc/sparc64/atomic-machine.h b/sysdeps/sparc/sparc64/atomic-machine.h
index 44a43ff0f5..72009329c3 100644
--- a/sysdeps/sparc/sparc64/atomic-machine.h
+++ b/sysdeps/sparc/sparc64/atomic-machine.h
@@ -47,6 +47,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 
 #define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
   (abort (), (__typeof (*mem)) 0)
diff --git a/sysdeps/tile/tilegx/atomic-machine.h b/sysdeps/tile/tilegx/atomic-machine.h
index 6345251afd..e77f6707b0 100644
--- a/sysdeps/tile/tilegx/atomic-machine.h
+++ b/sysdeps/tile/tilegx/atomic-machine.h
@@ -31,6 +31,7 @@
 #endif
 
 #define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 0
 
 /* Pick appropriate 8- or 4-byte instruction. */
 #define __atomic_update(mem, v, op)                                     \
diff --git a/sysdeps/tile/tilepro/atomic-machine.h b/sysdeps/tile/tilepro/atomic-machine.h
index 33a8b85bbd..45e36de1e4 100644
--- a/sysdeps/tile/tilepro/atomic-machine.h
+++ b/sysdeps/tile/tilepro/atomic-machine.h
@@ -23,6 +23,7 @@
 
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 0
 
 /* 32-bit integer compare-and-exchange. */
 static __inline __attribute__ ((always_inline))
diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
index 2cd22352b1..59581bd4bc 100644
--- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
@@ -39,6 +39,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 /* prev = *addr;
    if (prev == old)
      *addr = new;
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/atomic-machine.h b/sysdeps/unix/sysv/linux/m68k/coldfire/atomic-machine.h
index 6755a056f4..1daac514a9 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/atomic-machine.h
@@ -39,6 +39,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 /* The only basic operation needed is compare and exchange.  */
 /* For ColdFire we'll have to trap into the kernel mode anyway,
    so trap from the library rather then from the kernel wrapper.  */
diff --git a/sysdeps/unix/sysv/linux/nios2/atomic-machine.h b/sysdeps/unix/sysv/linux/nios2/atomic-machine.h
index 6111ccf886..d4dd025603 100644
--- a/sysdeps/unix/sysv/linux/nios2/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/nios2/atomic-machine.h
@@ -34,6 +34,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 #define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval)	\
   (abort (), (__typeof (*mem)) 0)
 #define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval)	\
diff --git a/sysdeps/unix/sysv/linux/sh/atomic-machine.h b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
index 3c58b70c02..5a7e953ac4 100644
--- a/sysdeps/unix/sysv/linux/sh/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
@@ -47,6 +47,9 @@ typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
 /* SH kernel has implemented a gUSA ("g" User Space Atomicity) support
    for the user space atomicity. The atomicity macros use this scheme.
 
diff --git a/sysdeps/x86_64/atomic-machine.h b/sysdeps/x86_64/atomic-machine.h
index 2e8a9aad04..c454734001 100644
--- a/sysdeps/x86_64/atomic-machine.h
+++ b/sysdeps/x86_64/atomic-machine.h
@@ -59,6 +59,7 @@ typedef uintmax_t uatomic_max_t;
 
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 1
+#define ATOMIC_EXCHANGE_USES_CAS 0
 
 #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
   __sync_val_compare_and_swap (mem, oldval, newval)