about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog74
-rw-r--r--nptl/DESIGN-condvar.txt134
-rw-r--r--nptl/Makefile6
-rw-r--r--nptl/lowlevelcond.sym16
-rw-r--r--nptl/nptl-printers.py70
-rw-r--r--nptl/nptl_lock_constants.pysym27
-rw-r--r--nptl/pthreadP.h7
-rw-r--r--nptl/pthread_cond_broadcast.c99
-rw-r--r--nptl/pthread_cond_common.c466
-rw-r--r--nptl/pthread_cond_destroy.c82
-rw-r--r--nptl/pthread_cond_init.c28
-rw-r--r--nptl/pthread_cond_signal.c99
-rw-r--r--nptl/pthread_cond_timedwait.c268
-rw-r--r--nptl/pthread_cond_wait.c754
-rw-r--r--nptl/pthread_condattr_getclock.c2
-rw-r--r--nptl/pthread_condattr_getpshared.c3
-rw-r--r--nptl/pthread_condattr_init.c4
-rw-r--r--nptl/pthread_condattr_setclock.c11
-rw-r--r--nptl/test-cond-printers.py2
-rw-r--r--nptl/tst-cond1.c3
-rw-r--r--nptl/tst-cond20.c5
-rw-r--r--nptl/tst-cond22.c18
-rw-r--r--sysdeps/aarch64/nptl/bits/pthreadtypes.h31
-rw-r--r--sysdeps/arm/nptl/bits/pthreadtypes.h29
-rw-r--r--sysdeps/ia64/nptl/bits/pthreadtypes.h31
-rw-r--r--sysdeps/m68k/nptl/bits/pthreadtypes.h32
-rw-r--r--sysdeps/microblaze/nptl/bits/pthreadtypes.h29
-rw-r--r--sysdeps/mips/nptl/bits/pthreadtypes.h31
-rw-r--r--sysdeps/nios2/nptl/bits/pthreadtypes.h31
-rw-r--r--sysdeps/nptl/internaltypes.h17
-rw-r--r--sysdeps/nptl/pthread.h2
-rw-r--r--sysdeps/s390/nptl/bits/pthreadtypes.h29
-rw-r--r--sysdeps/sh/nptl/bits/pthreadtypes.h29
-rw-r--r--sysdeps/tile/nptl/bits/pthreadtypes.h29
-rw-r--r--sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h31
-rw-r--r--sysdeps/unix/sysv/linux/hppa/internaltypes.h40
-rw-r--r--sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c41
-rw-r--r--sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c13
-rw-r--r--sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S20
-rw-r--r--sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S241
-rw-r--r--sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S216
-rw-r--r--sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S974
-rw-r--r--sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S642
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h31
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S177
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S161
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S623
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S555
-rw-r--r--sysdeps/x86/bits/pthreadtypes.h29
49 files changed, 1671 insertions, 4621 deletions
diff --git a/ChangeLog b/ChangeLog
index bbda2a7f2c..309b13cab8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,77 @@
+2016-12-31  Torvald Riegel  <triegel@redhat.com>
+
+	[BZ #13165]
+	* nptl/pthread_cond_broadcast.c (__pthread_cond_broadcast): Rewrite to
+	use new algorithm.
+	* nptl/pthread_cond_destroy.c (__pthread_cond_destroy): Likewise.
+	* nptl/pthread_cond_init.c (__pthread_cond_init): Likewise.
+	* nptl/pthread_cond_signal.c (__pthread_cond_signal): Likewise.
+	* nptl/pthread_cond_wait.c (__pthread_cond_wait): Likewise.
+	(__pthread_cond_timedwait): Move here from pthread_cond_timedwait.c.
+	(__condvar_confirm_wakeup, __condvar_cancel_waiting,
+	__condvar_cleanup_waiting, __condvar_dec_grefs,
+	__pthread_cond_wait_common): New.
+	(__condvar_cleanup): Remove.
+	* npt/pthread_condattr_getclock.c (pthread_condattr_getclock): Adapt.
+	* npt/pthread_condattr_setclock.c (pthread_condattr_setclock):
+	Likewise.
+	* npt/pthread_condattr_getpshared.c (pthread_condattr_getpshared):
+	Likewise.
+	* npt/pthread_condattr_init.c (pthread_condattr_init): Likewise.
+	* nptl/tst-cond1.c: Add comment.
+	* nptl/tst-cond20.c (do_test): Adapt.
+	* nptl/tst-cond22.c (do_test): Likewise.
+	* sysdeps/aarch64/nptl/bits/pthreadtypes.h (pthread_cond_t): Adapt
+	structure.
+	* sysdeps/arm/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/ia64/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/m68k/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/microblaze/nptl/bits/pthreadtypes.h (pthread_cond_t):
+	Likewise.
+	* sysdeps/mips/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/nios2/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/s390/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/sh/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/tile/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_cond_t):
+	Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h (pthread_cond_t):
+	Likewise.
+	* sysdeps/x86/bits/pthreadtypes.h (pthread_cond_t): Likewise.
+	* sysdeps/nptl/internaltypes.h (COND_NWAITERS_SHIFT): Remove.
+	(COND_CLOCK_BITS): Adapt.
+	* sysdeps/nptl/pthread.h (PTHREAD_COND_INITIALIZER): Adapt.
+	* nptl/pthreadP.h (__PTHREAD_COND_CLOCK_MONOTONIC_MASK,
+	__PTHREAD_COND_SHARED_MASK): New.
+	* nptl/nptl-printers.py (CLOCK_IDS): Remove.
+	(ConditionVariablePrinter, ConditionVariableAttributesPrinter): Adapt.
+	* nptl/nptl_lock_constants.pysym: Adapt.
+	* nptl/test-cond-printers.py: Adapt.
+	* sysdeps/unix/sysv/linux/hppa/internaltypes.h (cond_compat_clear,
+	cond_compat_check_and_clear): Adapt.
+	* sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c: Remove file ...
+	* sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c
+	(__pthread_cond_timedwait): ... and move here.
+	* nptl/DESIGN-condvar.txt: Remove file.
+	* nptl/lowlevelcond.sym: Likewise.
+	* nptl/pthread_cond_timedwait.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_broadcast.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_signal.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_timedwait.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_wait.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_broadcast.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_signal.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_wait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
+
 2016-12-31  Joseph Myers  <joseph@codesourcery.com>
 
 	* math/bits/mathcalls.h [__GLIBC_USE (IEC_60559_BFP_EXT)]
diff --git a/nptl/DESIGN-condvar.txt b/nptl/DESIGN-condvar.txt
deleted file mode 100644
index 4845251c75..0000000000
--- a/nptl/DESIGN-condvar.txt
+++ /dev/null
@@ -1,134 +0,0 @@
-Conditional Variable pseudocode.
-================================
-
-       int pthread_cond_timedwait (pthread_cond_t *cv, pthread_mutex_t *mutex);
-       int pthread_cond_signal    (pthread_cond_t *cv);
-       int pthread_cond_broadcast (pthread_cond_t *cv);
-
-struct pthread_cond_t {
-
-   unsigned int cond_lock;
-
-         internal mutex
-
-   uint64_t total_seq;
-
-     Total number of threads using the conditional variable.
-
-   uint64_t wakeup_seq;
-
-     sequence number for next wakeup.
-
-   uint64_t woken_seq;
-
-     sequence number of last woken thread.
-
-   uint32_t broadcast_seq;
-
-}
-
-
-struct cv_data {
-
-   pthread_cond_t *cv;
-
-   uint32_t bc_seq
-
-}
-
-
-
-cleanup_handler(cv_data)
-{
-  cv = cv_data->cv;
-  lll_lock(cv->lock);
-
-  if (cv_data->bc_seq == cv->broadcast_seq) {
-    ++cv->wakeup_seq;
-    ++cv->woken_seq;
-  }
-
-  /* make sure no signal gets lost.  */
-  FUTEX_WAKE(cv->wakeup_seq, ALL);
-
-  lll_unlock(cv->lock);
-}
-
-
-cond_timedwait(cv, mutex, timeout):
-{
-   lll_lock(cv->lock);
-   mutex_unlock(mutex);
-
-   cleanup_push
-
-   ++cv->total_seq;
-   val = seq =  cv->wakeup_seq;
-   cv_data.bc = cv->broadcast_seq;
-   cv_data.cv = cv;
-
-   while (1) {
-
-     lll_unlock(cv->lock);
-
-     enable_async(&cv_data);
-
-     ret = FUTEX_WAIT(cv->wakeup_seq, val, timeout);
-
-     restore_async
-
-     lll_lock(cv->lock);
-
-     if (bc != cv->broadcast_seq)
-       goto bc_out;
-
-     val = cv->wakeup_seq;
-
-     if (val != seq && cv->woken_seq != val) {
-       ret = 0;
-       break;
-     }
-
-     if (ret == TIMEDOUT) {
-       ++cv->wakeup_seq;
-       break;
-     }
-   }
-
-   ++cv->woken_seq;
-
- bc_out:
-   lll_unlock(cv->lock);
-
-   cleanup_pop
-
-   mutex_lock(mutex);
-
-   return ret;
-}
-
-cond_signal(cv)
-{
-   lll_lock(cv->lock);
-
-   if (cv->total_seq > cv->wakeup_seq) {
-     ++cv->wakeup_seq;
-     FUTEX_WAKE(cv->wakeup_seq, 1);
-   }
-
-   lll_unlock(cv->lock);
-}
-
-cond_broadcast(cv)
-{
-   lll_lock(cv->lock);
-
-   if (cv->total_seq > cv->wakeup_seq) {
-     cv->wakeup_seq = cv->total_seq;
-     cv->woken_seq = cv->total_seq;
-     ++cv->broadcast_seq;
-     FUTEX_WAKE(cv->wakeup_seq, ALL);
-   }
-
-   lll_unlock(cv->lock);
-}
diff --git a/nptl/Makefile b/nptl/Makefile
index bed5babfd9..62b0951ec0 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -83,7 +83,7 @@ libpthread-routines = nptl-init vars events version pt-interp \
 		      pthread_rwlockattr_getkind_np \
 		      pthread_rwlockattr_setkind_np \
 		      pthread_cond_init pthread_cond_destroy \
-		      pthread_cond_wait pthread_cond_timedwait \
+		      pthread_cond_wait \
 		      pthread_cond_signal pthread_cond_broadcast \
 		      old_pthread_cond_init old_pthread_cond_destroy \
 		      old_pthread_cond_wait old_pthread_cond_timedwait \
@@ -186,7 +186,6 @@ CFLAGS-pthread_timedjoin.c = -fexceptions -fasynchronous-unwind-tables
 CFLAGS-pthread_once.c = $(uses-callbacks) -fexceptions \
 			-fasynchronous-unwind-tables
 CFLAGS-pthread_cond_wait.c = -fexceptions -fasynchronous-unwind-tables
-CFLAGS-pthread_cond_timedwait.c = -fexceptions -fasynchronous-unwind-tables
 CFLAGS-sem_wait.c = -fexceptions -fasynchronous-unwind-tables
 CFLAGS-sem_timedwait.c = -fexceptions -fasynchronous-unwind-tables
 
@@ -307,8 +306,7 @@ test-xfail-tst-once5 = yes
 # Files which must not be linked with libpthread.
 tests-nolibpthread = tst-unload
 
-gen-as-const-headers = pthread-errnos.sym \
-		       lowlevelcond.sym lowlevelrwlock.sym \
+gen-as-const-headers = pthread-errnos.sym lowlevelrwlock.sym \
 		       unwindbuf.sym \
 		       lowlevelrobustlock.sym pthread-pi-defines.sym
 
diff --git a/nptl/lowlevelcond.sym b/nptl/lowlevelcond.sym
deleted file mode 100644
index 18e1adad43..0000000000
--- a/nptl/lowlevelcond.sym
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <stddef.h>
-#include <sched.h>
-#include <bits/pthreadtypes.h>
-#include <internaltypes.h>
-
---
-
-cond_lock	offsetof (pthread_cond_t, __data.__lock)
-cond_futex	offsetof (pthread_cond_t, __data.__futex)
-cond_nwaiters	offsetof (pthread_cond_t, __data.__nwaiters)
-total_seq	offsetof (pthread_cond_t, __data.__total_seq)
-wakeup_seq	offsetof (pthread_cond_t, __data.__wakeup_seq)
-woken_seq	offsetof (pthread_cond_t, __data.__woken_seq)
-dep_mutex	offsetof (pthread_cond_t, __data.__mutex)
-broadcast_seq	offsetof (pthread_cond_t, __data.__broadcast_seq)
-nwaiters_shift	COND_NWAITERS_SHIFT
diff --git a/nptl/nptl-printers.py b/nptl/nptl-printers.py
index e402f232c7..76adaddd95 100644
--- a/nptl/nptl-printers.py
+++ b/nptl/nptl-printers.py
@@ -293,16 +293,6 @@ class MutexAttributesPrinter(object):
         elif protocol == PTHREAD_PRIO_PROTECT:
             self.values.append(('Protocol', 'Priority protect'))
 
-CLOCK_IDS = {
-    CLOCK_REALTIME: 'CLOCK_REALTIME',
-    CLOCK_MONOTONIC: 'CLOCK_MONOTONIC',
-    CLOCK_PROCESS_CPUTIME_ID: 'CLOCK_PROCESS_CPUTIME_ID',
-    CLOCK_THREAD_CPUTIME_ID: 'CLOCK_THREAD_CPUTIME_ID',
-    CLOCK_MONOTONIC_RAW: 'CLOCK_MONOTONIC_RAW',
-    CLOCK_REALTIME_COARSE: 'CLOCK_REALTIME_COARSE',
-    CLOCK_MONOTONIC_COARSE: 'CLOCK_MONOTONIC_COARSE'
-}
-
 class ConditionVariablePrinter(object):
     """Pretty printer for pthread_cond_t."""
 
@@ -313,24 +303,8 @@ class ConditionVariablePrinter(object):
             cond: A gdb.value representing a pthread_cond_t.
         """
 
-        # Since PTHREAD_COND_SHARED is an integer, we need to cast it to void *
-        # to be able to compare it to the condvar's __data.__mutex member.
-        #
-        # While it looks like self.shared_value should be a class variable,
-        # that would result in it having an incorrect size if we're loading
-        # these printers through .gdbinit for a 64-bit objfile in AMD64.
-        # This is because gdb initially assumes the pointer size to be 4 bytes,
-        # and only sets it to 8 after loading the 64-bit objfiles.  Since
-        # .gdbinit runs before any objfiles are loaded, this would effectively
-        # make self.shared_value have a size of 4, thus breaking later
-        # comparisons with pointers whose types are looked up at runtime.
-        void_ptr_type = gdb.lookup_type('void').pointer()
-        self.shared_value = gdb.Value(PTHREAD_COND_SHARED).cast(void_ptr_type)
-
         data = cond['__data']
-        self.total_seq = data['__total_seq']
-        self.mutex = data['__mutex']
-        self.nwaiters = data['__nwaiters']
+        self.wrefs = data['__wrefs']
         self.values = []
 
         self.read_values()
@@ -360,7 +334,6 @@ class ConditionVariablePrinter(object):
 
         self.read_status()
         self.read_attributes()
-        self.read_mutex_info()
 
     def read_status(self):
         """Read the status of the condvar.
@@ -369,41 +342,22 @@ class ConditionVariablePrinter(object):
         are waiting for it.
         """
 
-        if self.total_seq == PTHREAD_COND_DESTROYED:
-            self.values.append(('Status', 'Destroyed'))
-
-        self.values.append(('Threads waiting for this condvar',
-                            self.nwaiters >> COND_NWAITERS_SHIFT))
+        self.values.append(('Threads known to still execute a wait function',
+                            self.wrefs >> PTHREAD_COND_WREFS_SHIFT))
 
     def read_attributes(self):
         """Read the condvar's attributes."""
 
-        clock_id = self.nwaiters & ((1 << COND_NWAITERS_SHIFT) - 1)
-
-        # clock_id must be casted to int because it's a gdb.Value
-        self.values.append(('Clock ID', CLOCK_IDS[int(clock_id)]))
+	if (self.wrefs & PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0:
+		self.values.append(('Clock ID', 'CLOCK_MONOTONIC'))
+	else:
+		self.values.append(('Clock ID', 'CLOCK_REALTIME'))
 
-        shared = (self.mutex == self.shared_value)
-
-        if shared:
+        if (self.wrefs & PTHREAD_COND_SHARED_MASK) != 0:
             self.values.append(('Shared', 'Yes'))
         else:
             self.values.append(('Shared', 'No'))
 
-    def read_mutex_info(self):
-        """Read the data of the mutex this condvar is bound to.
-
-        A pthread_cond_t's __data.__mutex member is a void * which
-        must be casted to pthread_mutex_t *.  For shared condvars, this
-        member isn't recorded and has a special value instead.
-        """
-
-        if self.mutex and self.mutex != self.shared_value:
-            mutex_type = gdb.lookup_type('pthread_mutex_t')
-            mutex = self.mutex.cast(mutex_type.pointer()).dereference()
-
-            self.values.append(('Mutex', mutex))
-
 class ConditionVariableAttributesPrinter(object):
     """Pretty printer for pthread_condattr_t.
 
@@ -453,10 +407,12 @@ class ConditionVariableAttributesPrinter(object):
         created in self.children.
         """
 
-        clock_id = self.condattr & ((1 << COND_NWAITERS_SHIFT) - 1)
+        clock_id = (self.condattr >> 1) & ((1 << COND_CLOCK_BITS) - 1)
 
-        # clock_id must be casted to int because it's a gdb.Value
-        self.values.append(('Clock ID', CLOCK_IDS[int(clock_id)]))
+	if clock_id != 0:
+		self.values.append(('Clock ID', 'CLOCK_MONOTONIC'))
+	else:
+		self.values.append(('Clock ID', 'CLOCK_REALTIME'))
 
         if self.condattr & 1:
             self.values.append(('Shared', 'Yes'))
diff --git a/nptl/nptl_lock_constants.pysym b/nptl/nptl_lock_constants.pysym
index 303ec61213..2ab3179155 100644
--- a/nptl/nptl_lock_constants.pysym
+++ b/nptl/nptl_lock_constants.pysym
@@ -44,26 +44,13 @@ PTHREAD_PRIO_NONE
 PTHREAD_PRIO_INHERIT
 PTHREAD_PRIO_PROTECT
 
--- These values are hardcoded as well:
--- Value of __mutex for shared condvars.
-PTHREAD_COND_SHARED             (void *)~0l
-
--- Value of __total_seq for destroyed condvars.
-PTHREAD_COND_DESTROYED          -1ull
-
--- __nwaiters encodes the number of threads waiting on a condvar
--- and the clock ID.
--- __nwaiters >> COND_NWAITERS_SHIFT gives us the number of waiters.
-COND_NWAITERS_SHIFT
-
--- Condvar clock IDs
-CLOCK_REALTIME
-CLOCK_MONOTONIC
-CLOCK_PROCESS_CPUTIME_ID
-CLOCK_THREAD_CPUTIME_ID
-CLOCK_MONOTONIC_RAW
-CLOCK_REALTIME_COARSE
-CLOCK_MONOTONIC_COARSE
+-- Condition variable
+-- FIXME Why do macros prefixed with __ cannot be used directly?
+PTHREAD_COND_SHARED_MASK          __PTHREAD_COND_SHARED_MASK
+PTHREAD_COND_CLOCK_MONOTONIC_MASK __PTHREAD_COND_CLOCK_MONOTONIC_MASK
+COND_CLOCK_BITS
+-- These values are hardcoded:
+PTHREAD_COND_WREFS_SHIFT          3
 
 -- Rwlock attributes
 PTHREAD_RWLOCK_PREFER_READER_NP
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 6e0dd09f4f..92a9992e1f 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -167,6 +167,13 @@ enum
 #define __PTHREAD_ONCE_FORK_GEN_INCR	4
 
 
+/* Condition variable definitions.  See __pthread_cond_wait_common.
+   Need to be defined here so there is one place from which
+   nptl_lock_constants can grab them.  */
+#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2
+#define __PTHREAD_COND_SHARED_MASK 1
+
+
 /* Internal variables.  */
 
 
diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c
index 552fd42f60..87c07552cf 100644
--- a/nptl/pthread_cond_broadcast.c
+++ b/nptl/pthread_cond_broadcast.c
@@ -19,72 +19,71 @@
 #include <endian.h>
 #include <errno.h>
 #include <sysdep.h>
-#include <lowlevellock.h>
+#include <futex-internal.h>
 #include <pthread.h>
 #include <pthreadP.h>
 #include <stap-probe.h>
+#include <atomic.h>
 
 #include <shlib-compat.h>
-#include <kernel-features.h>
 
+#include "pthread_cond_common.c"
 
+
+/* We do the following steps from __pthread_cond_signal in one critical
+   section: (1) signal all waiters in G1, (2) close G1 so that it can become
+   the new G2 and make G2 the new G1, and (3) signal all waiters in the new
+   G1.  We don't need to do all these steps if there are no waiters in G1
+   and/or G2.  See __pthread_cond_signal for further details.  */
 int
 __pthread_cond_broadcast (pthread_cond_t *cond)
 {
   LIBC_PROBE (cond_broadcast, 1, cond);
 
-  int pshared = (cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
-  /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
+  unsigned int wrefs = atomic_load_relaxed (&cond->__data.__wrefs);
+  if (wrefs >> 3 == 0)
+    return 0;
+  int private = __condvar_get_private (wrefs);
+
+  __condvar_acquire_lock (cond, private);
 
-  /* Are there any waiters to be woken?  */
-  if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
+  unsigned long long int wseq = __condvar_load_wseq_relaxed (cond);
+  unsigned int g2 = wseq & 1;
+  unsigned int g1 = g2 ^ 1;
+  wseq >>= 1;
+  bool do_futex_wake = false;
+
+  /* Step (1): signal all waiters remaining in G1.  */
+  if (cond->__data.__g_size[g1] != 0)
     {
-      /* Yes.  Mark them all as woken.  */
-      cond->__data.__wakeup_seq = cond->__data.__total_seq;
-      cond->__data.__woken_seq = cond->__data.__total_seq;
-      cond->__data.__futex = (unsigned int) cond->__data.__total_seq * 2;
-      int futex_val = cond->__data.__futex;
-      /* Signal that a broadcast happened.  */
-      ++cond->__data.__broadcast_seq;
-
-      /* We are done.  */
-      lll_unlock (cond->__data.__lock, pshared);
-
-      /* Wake everybody.  */
-      pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
-
-      /* Do not use requeue for pshared condvars.  */
-      if (mut == (void *) ~0l
-	  || PTHREAD_MUTEX_PSHARED (mut) & PTHREAD_MUTEX_PSHARED_BIT)
-	goto wake_all;
-
-#if (defined lll_futex_cmp_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-      if (USE_REQUEUE_PI (mut))
-	{
-	  if (lll_futex_cmp_requeue_pi (&cond->__data.__futex, 1, INT_MAX,
-					&mut->__data.__lock, futex_val,
-					LLL_PRIVATE) == 0)
-	    return 0;
-	}
-      else
-#endif
-	/* lll_futex_requeue returns 0 for success and non-zero
-	   for errors.  */
-	if (!__builtin_expect (lll_futex_requeue (&cond->__data.__futex, 1,
-						  INT_MAX, &mut->__data.__lock,
-						  futex_val, LLL_PRIVATE), 0))
-	  return 0;
-
-wake_all:
-      lll_futex_wake (&cond->__data.__futex, INT_MAX, pshared);
-      return 0;
+      /* Add as many signals as the remaining size of the group.  */
+      atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
+				cond->__data.__g_size[g1] << 1);
+      cond->__data.__g_size[g1] = 0;
+
+      /* We need to wake G1 waiters before we quiesce G1 below.  */
+      /* TODO Only set it if there are indeed futex waiters.  We could
+	 also try to move this out of the critical section in cases when
+	 G2 is empty (and we don't need to quiesce).  */
+      futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
     }
 
-  /* We are done.  */
-  lll_unlock (cond->__data.__lock, pshared);
+  /* G1 is complete.  Step (2) is next unless there are no waiters in G2, in
+     which case we can stop.  */
+  if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
+    {
+      /* Step (3): Send signals to all waiters in the old G2 / new G1.  */
+      atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
+				cond->__data.__g_size[g1] << 1);
+      cond->__data.__g_size[g1] = 0;
+      /* TODO Only set it if there are indeed futex waiters.  */
+      do_futex_wake = true;
+    }
+
+  __condvar_release_lock (cond, private);
+
+  if (do_futex_wake)
+    futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
 
   return 0;
 }
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
new file mode 100644
index 0000000000..b374396d45
--- /dev/null
+++ b/nptl/pthread_cond_common.c
@@ -0,0 +1,466 @@
+/* pthread_cond_common -- shared code for condition variable.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <atomic.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <libc-internal.h>
+
+/* We need 3 least-significant bits on __wrefs for something else.  */
+#define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29)
+
+#if __HAVE_64B_ATOMICS == 1
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_wseq_relaxed (pthread_cond_t *cond)
+{
+  return atomic_load_relaxed (&cond->__data.__wseq);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
+{
+  return atomic_fetch_add_acquire (&cond->__data.__wseq, val);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
+{
+  return atomic_fetch_xor_release (&cond->__data.__wseq, val);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
+{
+  return atomic_load_relaxed (&cond->__data.__g1_start);
+}
+
+static void __attribute__ ((unused))
+__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
+{
+  atomic_store_relaxed (&cond->__data.__g1_start,
+      atomic_load_relaxed (&cond->__data.__g1_start) + val);
+}
+
+#else
+
+/* We use two 64b counters: __wseq and __g1_start.  They are monotonically
+   increasing and single-writer-multiple-readers counters, so we can implement
+   load, fetch-and-add, and fetch-and-xor operations even when we just have
+   32b atomics.  Values we add or xor are less than or equal to 1<<31 (*),
+   so we only have to make overflow-and-addition atomic wrt. to concurrent
+   load operations and xor operations.  To do that, we split each counter into
+   two 32b values of which we reserve the MSB of each to represent an
+   overflow from the lower-order half to the higher-order half.
+
+   In the common case, the state is (higher-order / lower-order half, and . is
+   basically concatenation of the bits):
+   0.h     / 0.l  = h.l
+
+   When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
+   following steps S1-S4 (the values these represent are on the right-hand
+   side):
+   S1:  0.h     / 1.L == (h+1).L
+   S2:  1.(h+1) / 1.L == (h+1).L
+   S3:  1.(h+1) / 0.L == (h+1).L
+   S4:  0.(h+1) / 0.L == (h+1).L
+   If the LSB of the higher-order half is set, readers will ignore the
+   overflow bit in the lower-order half.
+
+   To get an atomic snapshot in load operations, we exploit that the
+   higher-order half is monotonically increasing; if we load a value V from
+   it, then read the lower-order half, and then read the higher-order half
+   again and see the same value V, we know that both halves have existed in
+   the sequence of values the full counter had.  This is similar to the
+   validated reads in the time-based STMs in GCC's libitm (e.g.,
+   method_ml_wt).
+
+   The xor operation needs to be an atomic read-modify-write.  The write
+   itself is not an issue as it affects just the lower-order half but not bits
+   used in the add operation.  To make the full fetch-and-xor atomic, we
+   exploit that concurrently, the value can increase by at most 1<<31 (*): The
+   xor operation is only called while having acquired the lock, so not more
+   than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus
+   increment __wseq.  Therefore, if the xor operation observes a value of
+   __wseq, then the value it applies the modification to later on can be
+   derived (see below).
+
+   One benefit of this scheme is that this makes load operations
+   obstruction-free because unlike if we would just lock the counter, readers
+   can almost always interpret a snapshot of each halves.  Readers can be
+   forced to read a new snapshot when the read is concurrent with an overflow.
+   However, overflows will happen infrequently, so load operations are
+   practically lock-free.
+
+   (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to
+   __g1_start (the two extra bits are for the lock in the two LSBs of
+   __g1_start).  */
+
+typedef struct
+{
+  unsigned int low;
+  unsigned int high;
+} _condvar_lohi;
+
+static uint64_t
+__condvar_fetch_add_64_relaxed (_condvar_lohi *lh, unsigned int op)
+{
+  /* S1. Note that this is an atomic read-modify-write so it extends the
+     release sequence of release MO store at S3.  */
+  unsigned int l = atomic_fetch_add_relaxed (&lh->low, op);
+  unsigned int h = atomic_load_relaxed (&lh->high);
+  uint64_t result = ((uint64_t) h << 31) | l;
+  l += op;
+  if ((l >> 31) > 0)
+    {
+      /* Overflow.  Need to increment higher-order half.  Note that all
+	 add operations are ordered in happens-before.  */
+      h++;
+      /* S2. Release MO to synchronize with the loads of the higher-order half
+	 in the load operation.  See __condvar_load_64_relaxed.  */
+      atomic_store_release (&lh->high, h | ((unsigned int) 1 << 31));
+      l ^= (unsigned int) 1 << 31;
+      /* S3.  See __condvar_load_64_relaxed.  */
+      atomic_store_release (&lh->low, l);
+      /* S4.  Likewise.  */
+      atomic_store_release (&lh->high, h);
+    }
+  return result;
+}
+
+static uint64_t
+__condvar_load_64_relaxed (_condvar_lohi *lh)
+{
+  unsigned int h, l, h2;
+  do
+    {
+      /* This load and the second one below to the same location read from the
+	 stores in the overflow handling of the add operation or the
+	 initializing stores (which is a simple special case because
+	 initialization always completely happens before further use).
+	 Because no two stores to the higher-order half write the same value,
+	 the loop ensures that if we continue to use the snapshot, this load
+	 and the second one read from the same store operation.  All candidate
+	 store operations have release MO.
+	 If we read from S2 in the first load, then we will see the value of
+	 S1 on the next load (because we synchronize with S2), or a value
+	 later in modification order.  We correctly ignore the lower-half's
+	 overflow bit in this case.  If we read from S4, then we will see the
+	 value of S3 in the next load (or a later value), which does not have
+	 the overflow bit set anymore.
+	  */
+      h = atomic_load_acquire (&lh->high);
+      /* This will read from the release sequence of S3 (i.e, either the S3
+	 store or the read-modify-writes at S1 following S3 in modification
+	 order).  Thus, the read synchronizes with S3, and the following load
+	 of the higher-order half will read from the matching S2 (or a later
+	 value).
+	 Thus, if we read a lower-half value here that already overflowed and
+	 belongs to an increased higher-order half value, we will see the
+	 latter and h and h2 will not be equal.  */
+      l = atomic_load_acquire (&lh->low);
+      /* See above.  */
+      h2 = atomic_load_relaxed (&lh->high);
+    }
+  while (h != h2);
+  if (((l >> 31) > 0) && ((h >> 31) > 0))
+    l ^= (unsigned int) 1 << 31;
+  return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_wseq_relaxed (pthread_cond_t *cond)
+{
+  return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
+{
+  uint64_t r = __condvar_fetch_add_64_relaxed
+      ((_condvar_lohi *) &cond->__data.__wseq32, val);
+  atomic_thread_fence_acquire ();
+  return r;
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
+{
+  _condvar_lohi *lh = (_condvar_lohi *) &cond->__data.__wseq32;
+  /* First, get the current value.  See __condvar_load_64_relaxed.  */
+  unsigned int h, l, h2;
+  do
+    {
+      h = atomic_load_acquire (&lh->high);
+      l = atomic_load_acquire (&lh->low);
+      h2 = atomic_load_relaxed (&lh->high);
+    }
+  while (h != h2);
+  if (((l >> 31) > 0) && ((h >> 31) == 0))
+    h++;
+  h &= ~((unsigned int) 1 << 31);
+  l &= ~((unsigned int) 1 << 31);
+
+  /* Now modify.  Due to the coherence rules, the prior load will read a value
+     earlier in modification order than the following fetch-xor.
+     This uses release MO to make the full operation have release semantics
+     (all other operations access the lower-order half).  */
+  unsigned int l2 = atomic_fetch_xor_release (&lh->low, val)
+      & ~((unsigned int) 1 << 31);
+  if (l2 < l)
+    /* The lower-order half overflowed in the meantime.  This happened exactly
+       once due to the limit on concurrent waiters (see above).  */
+    h++;
+  return ((uint64_t) h << 31) + l2;
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
+{
+  return __condvar_load_64_relaxed
+      ((_condvar_lohi *) &cond->__data.__g1_start32);
+}
+
+static void __attribute__ ((unused))
+__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
+{
+  ignore_value (__condvar_fetch_add_64_relaxed
+      ((_condvar_lohi *) &cond->__data.__g1_start32, val));
+}
+
+#endif  /* !__HAVE_64B_ATOMICS  */
+
+
+/* The lock that signalers use.  See pthread_cond_wait_common for uses.
+   The lock is our normal three-state lock: not acquired (0) / acquired (1) /
+   acquired-with-futex_wake-request (2).  However, we need to preserve the
+   other bits in the unsigned int used for the lock, and therefore it is a
+   little more complex.  */
+static void __attribute__ ((unused))
+__condvar_acquire_lock (pthread_cond_t *cond, int private)
+{
+  unsigned int s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
+  while ((s & 3) == 0)
+    {
+      if (atomic_compare_exchange_weak_acquire (&cond->__data.__g1_orig_size,
+	  &s, s | 1))
+	return;
+      /* TODO Spinning and back-off.  */
+    }
+  /* We can't change from not acquired to acquired, so try to change to
+     acquired-with-futex-wake-request and do a futex wait if we cannot change
+     from not acquired.  */
+  while (1)
+    {
+      while ((s & 3) != 2)
+	{
+	  if (atomic_compare_exchange_weak_acquire
+	      (&cond->__data.__g1_orig_size, &s, (s & ~(unsigned int) 3) | 2))
+	    {
+	      if ((s & 3) == 0)
+		return;
+	      break;
+	    }
+	  /* TODO Back off.  */
+	}
+      futex_wait_simple (&cond->__data.__g1_orig_size,
+	  (s & ~(unsigned int) 3) | 2, private);
+      /* Reload so we see a recent value.  */
+      s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
+    }
+}
+
+/* See __condvar_acquire_lock.  */
+static void __attribute__ ((unused))
+__condvar_release_lock (pthread_cond_t *cond, int private)
+{
+  if ((atomic_fetch_and_release (&cond->__data.__g1_orig_size,
+				 ~(unsigned int) 3) & 3)
+      == 2)
+    futex_wake (&cond->__data.__g1_orig_size, 1, private);
+}
+
+/* Only use this when having acquired the lock.  */
+static unsigned int __attribute__ ((unused))
+__condvar_get_orig_size (pthread_cond_t *cond)
+{
+  return atomic_load_relaxed (&cond->__data.__g1_orig_size) >> 2;
+}
+
+/* Only use this when having acquired the lock.  */
+static void __attribute__ ((unused))
+__condvar_set_orig_size (pthread_cond_t *cond, unsigned int size)
+{
+  /* We have acquired the lock, but might get one concurrent update due to a
+     lock state change from acquired to acquired-with-futex_wake-request.
+     The store with relaxed MO is fine because there will be no further
+     changes to the lock bits nor the size, and we will subsequently release
+     the lock with release MO.  */
+  unsigned int s;
+  s = (atomic_load_relaxed (&cond->__data.__g1_orig_size) & 3)
+      | (size << 2);
+  if ((atomic_exchange_relaxed (&cond->__data.__g1_orig_size, s) & 3)
+      != (s & 3))
+    atomic_store_relaxed (&cond->__data.__g1_orig_size, (size << 2) | 2);
+}
+
+/* Returns FUTEX_SHARED or FUTEX_PRIVATE based on the provided __wrefs
+   value.  */
+static int __attribute__ ((unused))
+__condvar_get_private (int flags)
+{
+  if ((flags & __PTHREAD_COND_SHARED_MASK) == 0)
+    return FUTEX_PRIVATE;
+  else
+    return FUTEX_SHARED;
+}
+
+/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to
+   leave G1, converts G1 into a fresh G2, and then switches group roles so that
+   the former G2 becomes the new G1 ending at the current __wseq value when we
+   eventually make the switch (WSEQ is just an observation of __wseq by the
+   signaler).
+   If G2 is empty, it will not switch groups because then it would create an
+   empty G1 which would require switching groups again on the next signal.
+   Returns false iff groups were not switched because G2 was empty.  */
+static bool __attribute__ ((unused))
+__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
+    unsigned int *g1index, int private)
+{
+  const unsigned int maxspin = 0;
+  unsigned int g1 = *g1index;
+
+  /* If there is no waiter in G2, we don't do anything.  The expression may
+     look odd but remember that __g_size might hold a negative value, so
+     putting the expression this way avoids relying on implementation-defined
+     behavior.
+     Note that this works correctly for a zero-initialized condvar too.  */
+  unsigned int old_orig_size = __condvar_get_orig_size (cond);
+  uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+  if (((unsigned) (wseq - old_g1_start - old_orig_size)
+	  + cond->__data.__g_size[g1 ^ 1]) == 0)
+	return false;
+
+  /* Now try to close and quiesce G1.  We have to consider the following kinds
+     of waiters:
+     * Waiters from less recent groups than G1 are not affected because
+       nothing will change for them apart from __g1_start getting larger.
+     * New waiters arriving concurrently with the group switching will all go
+       into G2 until we atomically make the switch.  Waiters existing in G2
+       are not affected.
+     * Waiters in G1 will be closed out immediately by setting a flag in
+       __g_signals, which will prevent waiters from blocking using a futex on
+       __g_signals and also notifies them that the group is closed.  As a
+       result, they will eventually remove their group reference, allowing us
+       to close switch group roles.  */
+
+  /* First, set the closed flag on __g_signals.  This tells waiters that are
+     about to wait that they shouldn't do that anymore.  This basically
+     serves as an advance notificaton of the upcoming change to __g1_start;
+     waiters interpret it as if __g1_start was larger than their waiter
+     sequence position.  This allows us to change __g1_start after waiting
+     for all existing waiters with group references to leave, which in turn
+     makes recovery after stealing a signal simpler because it then can be
+     skipped if __g1_start indicates that the group is closed (otherwise,
+     we would have to recover always because waiters don't know how big their
+     groups are).  Relaxed MO is fine.  */
+  atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1);
+
+  /* Wait until there are no group references anymore.  The fetch-or operation
+     injects us into the modification order of __g_refs; release MO ensures
+     that waiters incrementing __g_refs after our fetch-or see the previous
+     changes to __g_signals and to __g1_start that had to happen before we can
+     switch this G1 and alias with an older group (we have two groups, so
+     aliasing requires switching group roles twice).  Note that nobody else
+     can have set the wake-request flag, so we do not have to act upon it.
+
+     Also note that it is harmless if older waiters or waiters from this G1
+     get a group reference after we have quiesced the group because it will
+     remain closed for them either because of the closed flag in __g_signals
+     or the later update to __g1_start.  New waiters will never arrive here
+     but instead continue to go into the still current G2.  */
+  unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0);
+  while ((r >> 1) > 0)
+    {
+      for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--)
+	{
+	  /* TODO Back off.  */
+	  r = atomic_load_relaxed (cond->__data.__g_refs + g1);
+	}
+      if ((r >> 1) > 0)
+	{
+	  /* There is still a waiter after spinning.  Set the wake-request
+	     flag and block.  Relaxed MO is fine because this is just about
+	     this futex word.  */
+	  r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1);
+
+	  if ((r >> 1) > 0)
+	    futex_wait_simple (cond->__data.__g_refs + g1, r, private);
+	  /* Reload here so we eventually see the most recent value even if we
+	     do not spin.   */
+	  r = atomic_load_relaxed (cond->__data.__g_refs + g1);
+	}
+    }
+  /* Acquire MO so that we synchronize with the release operation that waiters
+     use to decrement __g_refs and thus happen after the waiters we waited
+     for.  */
+  atomic_thread_fence_acquire ();
+
+  /* Update __g1_start, which finishes closing this group.  The value we add
+     will never be negative because old_orig_size can only be zero when we
+     switch groups the first time after a condvar was initialized, in which
+     case G1 will be at index 1 and we will add a value of 1.  See above for
+     why this takes place after waiting for quiescence of the group.
+     Relaxed MO is fine because the change comes with no additional
+     constraints that others would have to observe.  */
+  __condvar_add_g1_start_relaxed (cond,
+      (old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
+
+  /* Now reopen the group, thus enabling waiters to again block using the
+     futex controlled by __g_signals.  Release MO so that observers that see
+     no signals (and thus can block) also see the write __g1_start and thus
+     that this is now a new group (see __pthread_cond_wait_common for the
+     matching acquire MO loads).  */
+  atomic_store_release (cond->__data.__g_signals + g1, 0);
+
+  /* At this point, the old G1 is now a valid new G2 (but not in use yet).
+     No old waiter can neither grab a signal nor acquire a reference without
+     noticing that __g1_start is larger.
+     We can now publish the group switch by flipping the G2 index in __wseq.
+     Release MO so that this synchronizes with the acquire MO operation
+     waiters use to obtain a position in the waiter sequence.  */
+  wseq = __condvar_fetch_xor_wseq_release (cond, 1) >> 1;
+  g1 ^= 1;
+  *g1index ^= 1;
+
+  /* These values are just observed by signalers, and thus protected by the
+     lock.  */
+  unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
+  __condvar_set_orig_size (cond, orig_size);
+  /* Use and addition to not loose track of cancellations in what was
+     previously G2.  */
+  cond->__data.__g_size[g1] += orig_size;
+
+  /* The new G1's size may be zero because of cancellations during its time
+     as G2.  If this happens, there are no waiters that have to receive a
+     signal, so we do not need to add any and return false.  */
+  if (cond->__data.__g_size[g1] == 0)
+    return false;
+
+  return true;
+}
diff --git a/nptl/pthread_cond_destroy.c b/nptl/pthread_cond_destroy.c
index 1acd8042d8..5845c6a7ad 100644
--- a/nptl/pthread_cond_destroy.c
+++ b/nptl/pthread_cond_destroy.c
@@ -20,66 +20,42 @@
 #include <shlib-compat.h>
 #include "pthreadP.h"
 #include <stap-probe.h>
-
-
+#include <atomic.h>
+#include <futex-internal.h>
+
+#include "pthread_cond_common.c"
+
+/* See __pthread_cond_wait for a high-level description of the algorithm.
+
+   A correct program must make sure that no waiters are blocked on the condvar
+   when it is destroyed, and that there are no concurrent signals or
+   broadcasts.  To wake waiters reliably, the program must signal or
+   broadcast while holding the mutex or after having held the mutex.  It must
+   also ensure that no signal or broadcast are still pending to unblock
+   waiters; IOW, because waiters can wake up spuriously, the program must
+   effectively ensure that destruction happens after the execution of those
+   signal or broadcast calls.
+   Thus, we can assume that all waiters that are still accessing the condvar
+   have been woken.  We wait until they have confirmed to have woken up by
+   decrementing __wrefs.  */
 int
 __pthread_cond_destroy (pthread_cond_t *cond)
 {
-  int pshared = (cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
-
   LIBC_PROBE (cond_destroy, 1, cond);
 
-  /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
-
-  if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
-    {
-      /* If there are still some waiters which have not been
-	 woken up, this is an application bug.  */
-      lll_unlock (cond->__data.__lock, pshared);
-      return EBUSY;
-    }
-
-  /* Tell pthread_cond_*wait that this condvar is being destroyed.  */
-  cond->__data.__total_seq = -1ULL;
-
-  /* If there are waiters which have been already signalled or
-     broadcasted, but still are using the pthread_cond_t structure,
-     pthread_cond_destroy needs to wait for them.  */
-  unsigned int nwaiters = cond->__data.__nwaiters;
-
-  if (nwaiters >= (1 << COND_NWAITERS_SHIFT))
+  /* Set the wake request flag.  We could also spin, but destruction that is
+     concurrent with still-active waiters is probably neither common nor
+     performance critical.  Acquire MO to synchronize with waiters confirming
+     that they finished.  */
+  unsigned int wrefs = atomic_fetch_or_acquire (&cond->__data.__wrefs, 4);
+  int private = __condvar_get_private (wrefs);
+  while (wrefs >> 3 != 0)
     {
-      /* Wake everybody on the associated mutex in case there are
-	 threads that have been requeued to it.
-	 Without this, pthread_cond_destroy could block potentially
-	 for a long time or forever, as it would depend on other
-	 thread's using the mutex.
-	 When all threads waiting on the mutex are woken up, pthread_cond_wait
-	 only waits for threads to acquire and release the internal
-	 condvar lock.  */
-      if (cond->__data.__mutex != NULL
-	  && cond->__data.__mutex != (void *) ~0l)
-	{
-	  pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
-	  lll_futex_wake (&mut->__data.__lock, INT_MAX,
-			  PTHREAD_MUTEX_PSHARED (mut));
-	}
-
-      do
-	{
-	  lll_unlock (cond->__data.__lock, pshared);
-
-	  lll_futex_wait (&cond->__data.__nwaiters, nwaiters, pshared);
-
-	  lll_lock (cond->__data.__lock, pshared);
-
-	  nwaiters = cond->__data.__nwaiters;
-	}
-      while (nwaiters >= (1 << COND_NWAITERS_SHIFT));
+      futex_wait_simple (&cond->__data.__wrefs, wrefs, private);
+      /* See above.  */
+      wrefs = atomic_load_acquire (&cond->__data.__wrefs);
     }
-
+  /* The memory the condvar occupies can now be reused.  */
   return 0;
 }
 versioned_symbol (libpthread, __pthread_cond_destroy,
diff --git a/nptl/pthread_cond_init.c b/nptl/pthread_cond_init.c
index 9023370278..c1eac5f779 100644
--- a/nptl/pthread_cond_init.c
+++ b/nptl/pthread_cond_init.c
@@ -19,25 +19,29 @@
 #include <shlib-compat.h>
 #include "pthreadP.h"
 #include <stap-probe.h>
+#include <string.h>
 
 
+/* See __pthread_cond_wait for details.  */
 int
 __pthread_cond_init (pthread_cond_t *cond, const pthread_condattr_t *cond_attr)
 {
   struct pthread_condattr *icond_attr = (struct pthread_condattr *) cond_attr;
 
-  cond->__data.__lock = LLL_LOCK_INITIALIZER;
-  cond->__data.__futex = 0;
-  cond->__data.__nwaiters = (icond_attr != NULL
-			     ? ((icond_attr->value >> 1)
-				& ((1 << COND_NWAITERS_SHIFT) - 1))
-			     : CLOCK_REALTIME);
-  cond->__data.__total_seq = 0;
-  cond->__data.__wakeup_seq = 0;
-  cond->__data.__woken_seq = 0;
-  cond->__data.__mutex = (icond_attr == NULL || (icond_attr->value & 1) == 0
-			  ? NULL : (void *) ~0l);
-  cond->__data.__broadcast_seq = 0;
+  memset (cond, 0, sizeof (pthread_cond_t));
+
+  /* Update the pretty printers if the internal representation of icond_attr
+     is changed.  */
+
+  /* Iff not equal to ~0l, this is a PTHREAD_PROCESS_PRIVATE condvar.  */
+  if (icond_attr != NULL && (icond_attr->value & 1) != 0)
+    cond->__data.__wrefs |= __PTHREAD_COND_SHARED_MASK;
+  int clockid = (icond_attr != NULL
+		 ? ((icond_attr->value >> 1) & ((1 << COND_CLOCK_BITS) - 1))
+		 : CLOCK_REALTIME);
+  /* If 0, CLOCK_REALTIME is used; CLOCK_MONOTONIC otherwise.  */
+  if (clockid != CLOCK_REALTIME)
+    cond->__data.__wrefs |= __PTHREAD_COND_CLOCK_MONOTONIC_MASK;
 
   LIBC_PROBE (cond_init, 2, cond, cond_attr);
 
diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c
index b3a6d3d2a4..a95d5690af 100644
--- a/nptl/pthread_cond_signal.c
+++ b/nptl/pthread_cond_signal.c
@@ -19,62 +19,79 @@
 #include <endian.h>
 #include <errno.h>
 #include <sysdep.h>
-#include <lowlevellock.h>
+#include <futex-internal.h>
 #include <pthread.h>
 #include <pthreadP.h>
+#include <atomic.h>
+#include <stdint.h>
 
 #include <shlib-compat.h>
-#include <kernel-features.h>
 #include <stap-probe.h>
 
+#include "pthread_cond_common.c"
 
+/* See __pthread_cond_wait for a high-level description of the algorithm.  */
 int
 __pthread_cond_signal (pthread_cond_t *cond)
 {
-  int pshared = (cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
-
   LIBC_PROBE (cond_signal, 1, cond);
 
-  /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
-
-  /* Are there any waiters to be woken?  */
-  if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
+  /* First check whether there are waiters.  Relaxed MO is fine for that for
+     the same reasons that relaxed MO is fine when observing __wseq (see
+     below).  */
+  unsigned int wrefs = atomic_load_relaxed (&cond->__data.__wrefs);
+  if (wrefs >> 3 == 0)
+    return 0;
+  int private = __condvar_get_private (wrefs);
+
+  __condvar_acquire_lock (cond, private);
+
+  /* Load the waiter sequence number, which represents our relative ordering
+     to any waiters.  Relaxed MO is sufficient for that because:
+     1) We can pick any position that is allowed by external happens-before
+        constraints.  In particular, if another __pthread_cond_wait call
+        happened before us, this waiter must be eligible for being woken by
+        us.  The only way do establish such a happens-before is by signaling
+        while having acquired the mutex associated with the condvar and
+        ensuring that the signal's critical section happens after the waiter.
+        Thus, the mutex ensures that we see that waiter's __wseq increase.
+     2) Once we pick a position, we do not need to communicate this to the
+        program via a happens-before that we set up: First, any wake-up could
+        be a spurious wake-up, so the program must not interpret a wake-up as
+        an indication that the waiter happened before a particular signal;
+        second, a program cannot detect whether a waiter has not yet been
+        woken (i.e., it cannot distinguish between a non-woken waiter and one
+        that has been woken but hasn't resumed execution yet), and thus it
+        cannot try to deduce that a signal happened before a particular
+        waiter.  */
+  unsigned long long int wseq = __condvar_load_wseq_relaxed (cond);
+  unsigned int g1 = (wseq & 1) ^ 1;
+  wseq >>= 1;
+  bool do_futex_wake = false;
+
+  /* If G1 is still receiving signals, we put the signal there.  If not, we
+     check if G2 has waiters, and if so, quiesce and switch G1 to the former
+     G2; if this results in a new G1 with waiters (G2 might have cancellations
+     already, see __condvar_quiesce_and_switch_g1), we put the signal in the
+     new G1.  */
+  if ((cond->__data.__g_size[g1] != 0)
+      || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
     {
-      /* Yes.  Mark one of them as woken.  */
-      ++cond->__data.__wakeup_seq;
-      ++cond->__data.__futex;
-
-#if (defined lll_futex_cmp_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-      pthread_mutex_t *mut = cond->__data.__mutex;
-
-      if (USE_REQUEUE_PI (mut)
-	/* This can only really fail with a ENOSYS, since nobody can modify
-	   futex while we have the cond_lock.  */
-	  && lll_futex_cmp_requeue_pi (&cond->__data.__futex, 1, 0,
-				       &mut->__data.__lock,
-				       cond->__data.__futex, pshared) == 0)
-	{
-	  lll_unlock (cond->__data.__lock, pshared);
-	  return 0;
-	}
-      else
-#endif
-	/* Wake one.  */
-	if (! __builtin_expect (lll_futex_wake_unlock (&cond->__data.__futex,
-						       1, 1,
-						       &cond->__data.__lock,
-						       pshared), 0))
-	  return 0;
-
-      /* Fallback if neither of them work.  */
-      lll_futex_wake (&cond->__data.__futex, 1, pshared);
+      /* Add a signal.  Relaxed MO is fine because signaling does not need to
+	 establish a happens-before relation (see above).  We do not mask the
+	 release-MO store when initializing a group in
+	 __condvar_quiesce_and_switch_g1 because we use an atomic
+	 read-modify-write and thus extend that store's release sequence.  */
+      atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2);
+      cond->__data.__g_size[g1]--;
+      /* TODO Only set it if there are indeed futex waiters.  */
+      do_futex_wake = true;
     }
 
-  /* We are done.  */
-  lll_unlock (cond->__data.__lock, pshared);
+  __condvar_release_lock (cond, private);
+
+  if (do_futex_wake)
+    futex_wake (cond->__data.__g_signals + g1, 1, private);
 
   return 0;
 }
diff --git a/nptl/pthread_cond_timedwait.c b/nptl/pthread_cond_timedwait.c
deleted file mode 100644
index 711a51de20..0000000000
--- a/nptl/pthread_cond_timedwait.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <endian.h>
-#include <errno.h>
-#include <sysdep.h>
-#include <lowlevellock.h>
-#include <pthread.h>
-#include <pthreadP.h>
-#include <sys/time.h>
-#include <kernel-features.h>
-
-#include <shlib-compat.h>
-
-#ifndef HAVE_CLOCK_GETTIME_VSYSCALL
-# undef INTERNAL_VSYSCALL
-# define INTERNAL_VSYSCALL INTERNAL_SYSCALL
-# undef INLINE_VSYSCALL
-# define INLINE_VSYSCALL INLINE_SYSCALL
-#else
-# include <libc-vdso.h>
-#endif
-
-/* Cleanup handler, defined in pthread_cond_wait.c.  */
-extern void __condvar_cleanup (void *arg)
-     __attribute__ ((visibility ("hidden")));
-
-struct _condvar_cleanup_buffer
-{
-  int oldtype;
-  pthread_cond_t *cond;
-  pthread_mutex_t *mutex;
-  unsigned int bc_seq;
-};
-
-int
-__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
-			  const struct timespec *abstime)
-{
-  struct _pthread_cleanup_buffer buffer;
-  struct _condvar_cleanup_buffer cbuffer;
-  int result = 0;
-
-  /* Catch invalid parameters.  */
-  if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
-    return EINVAL;
-
-  int pshared = (cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
-
-#if (defined lll_futex_timed_wait_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-  int pi_flag = 0;
-#endif
-
-  /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
-
-  /* Now we can release the mutex.  */
-  int err = __pthread_mutex_unlock_usercnt (mutex, 0);
-  if (err)
-    {
-      lll_unlock (cond->__data.__lock, pshared);
-      return err;
-    }
-
-  /* We have one new user of the condvar.  */
-  ++cond->__data.__total_seq;
-  ++cond->__data.__futex;
-  cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT;
-
-  /* Work around the fact that the kernel rejects negative timeout values
-     despite them being valid.  */
-  if (__glibc_unlikely (abstime->tv_sec < 0))
-    goto timeout;
-
-  /* Remember the mutex we are using here.  If there is already a
-     different address store this is a bad user bug.  Do not store
-     anything for pshared condvars.  */
-  if (cond->__data.__mutex != (void *) ~0l)
-    cond->__data.__mutex = mutex;
-
-  /* Prepare structure passed to cancellation handler.  */
-  cbuffer.cond = cond;
-  cbuffer.mutex = mutex;
-
-  /* Before we block we enable cancellation.  Therefore we have to
-     install a cancellation handler.  */
-  __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer);
-
-  /* The current values of the wakeup counter.  The "woken" counter
-     must exceed this value.  */
-  unsigned long long int val;
-  unsigned long long int seq;
-  val = seq = cond->__data.__wakeup_seq;
-  /* Remember the broadcast counter.  */
-  cbuffer.bc_seq = cond->__data.__broadcast_seq;
-
-  while (1)
-    {
-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
-     || !defined lll_futex_timed_wait_bitset)
-      struct timespec rt;
-      {
-# ifdef __NR_clock_gettime
-	INTERNAL_SYSCALL_DECL (err);
-	(void) INTERNAL_VSYSCALL (clock_gettime, err, 2,
-				  (cond->__data.__nwaiters
-				   & ((1 << COND_NWAITERS_SHIFT) - 1)),
-				  &rt);
-	/* Convert the absolute timeout value to a relative timeout.  */
-	rt.tv_sec = abstime->tv_sec - rt.tv_sec;
-	rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
-# else
-	/* Get the current time.  So far we support only one clock.  */
-	struct timeval tv;
-	(void) __gettimeofday (&tv, NULL);
-
-	/* Convert the absolute timeout value to a relative timeout.  */
-	rt.tv_sec = abstime->tv_sec - tv.tv_sec;
-	rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
-# endif
-      }
-      if (rt.tv_nsec < 0)
-	{
-	  rt.tv_nsec += 1000000000;
-	  --rt.tv_sec;
-	}
-      /* Did we already time out?  */
-      if (__glibc_unlikely (rt.tv_sec < 0))
-	{
-	  if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
-	    goto bc_out;
-
-	  goto timeout;
-	}
-#endif
-
-      unsigned int futex_val = cond->__data.__futex;
-
-      /* Prepare to wait.  Release the condvar futex.  */
-      lll_unlock (cond->__data.__lock, pshared);
-
-      /* Enable asynchronous cancellation.  Required by the standard.  */
-      cbuffer.oldtype = __pthread_enable_asynccancel ();
-
-/* REQUEUE_PI was implemented after FUTEX_CLOCK_REALTIME, so it is sufficient
-   to check just the former.  */
-#if (defined lll_futex_timed_wait_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-      /* If pi_flag remained 1 then it means that we had the lock and the mutex
-	 but a spurious waker raced ahead of us.  Give back the mutex before
-	 going into wait again.  */
-      if (pi_flag)
-	{
-	  __pthread_mutex_cond_lock_adjust (mutex);
-	  __pthread_mutex_unlock_usercnt (mutex, 0);
-	}
-      pi_flag = USE_REQUEUE_PI (mutex);
-
-      if (pi_flag)
-	{
-	  unsigned int clockbit = (cond->__data.__nwaiters & 1
-				   ? 0 : FUTEX_CLOCK_REALTIME);
-	  err = lll_futex_timed_wait_requeue_pi (&cond->__data.__futex,
-						 futex_val, abstime, clockbit,
-						 &mutex->__data.__lock,
-						 pshared);
-	  pi_flag = (err == 0);
-	}
-      else
-#endif
-
-	{
-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
-     || !defined lll_futex_timed_wait_bitset)
-	  /* Wait until woken by signal or broadcast.  */
-	  err = lll_futex_timed_wait (&cond->__data.__futex,
-				      futex_val, &rt, pshared);
-#else
-	  unsigned int clockbit = (cond->__data.__nwaiters & 1
-				   ? 0 : FUTEX_CLOCK_REALTIME);
-	  err = lll_futex_timed_wait_bitset (&cond->__data.__futex, futex_val,
-					     abstime, clockbit, pshared);
-#endif
-	}
-
-      /* Disable asynchronous cancellation.  */
-      __pthread_disable_asynccancel (cbuffer.oldtype);
-
-      /* We are going to look at shared data again, so get the lock.  */
-      lll_lock (cond->__data.__lock, pshared);
-
-      /* If a broadcast happened, we are done.  */
-      if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
-	goto bc_out;
-
-      /* Check whether we are eligible for wakeup.  */
-      val = cond->__data.__wakeup_seq;
-      if (val != seq && cond->__data.__woken_seq != val)
-	break;
-
-      /* Not woken yet.  Maybe the time expired?  */
-      if (__glibc_unlikely (err == -ETIMEDOUT))
-	{
-	timeout:
-	  /* Yep.  Adjust the counters.  */
-	  ++cond->__data.__wakeup_seq;
-	  ++cond->__data.__futex;
-
-	  /* The error value.  */
-	  result = ETIMEDOUT;
-	  break;
-	}
-    }
-
-  /* Another thread woken up.  */
-  ++cond->__data.__woken_seq;
-
- bc_out:
-
-  cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT;
-
-  /* If pthread_cond_destroy was called on this variable already,
-     notify the pthread_cond_destroy caller all waiters have left
-     and it can be successfully destroyed.  */
-  if (cond->__data.__total_seq == -1ULL
-      && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT))
-    lll_futex_wake (&cond->__data.__nwaiters, 1, pshared);
-
-  /* We are done with the condvar.  */
-  lll_unlock (cond->__data.__lock, pshared);
-
-  /* The cancellation handling is back to normal, remove the handler.  */
-  __pthread_cleanup_pop (&buffer, 0);
-
-  /* Get the mutex before returning.  */
-#if (defined lll_futex_timed_wait_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-  if (pi_flag)
-    {
-      __pthread_mutex_cond_lock_adjust (mutex);
-      err = 0;
-    }
-  else
-#endif
-    err = __pthread_mutex_cond_lock (mutex);
-
-  return err ?: result;
-}
-
-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
-		  GLIBC_2_3_2);
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 3f62acc6bd..2b434026c6 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -19,219 +19,655 @@
 #include <endian.h>
 #include <errno.h>
 #include <sysdep.h>
-#include <lowlevellock.h>
+#include <futex-internal.h>
 #include <pthread.h>
 #include <pthreadP.h>
-#include <kernel-features.h>
+#include <sys/time.h>
+#include <atomic.h>
+#include <stdint.h>
+#include <stdbool.h>
 
 #include <shlib-compat.h>
 #include <stap-probe.h>
+#include <time.h>
+
+#include "pthread_cond_common.c"
+
 
 struct _condvar_cleanup_buffer
 {
-  int oldtype;
+  uint64_t wseq;
   pthread_cond_t *cond;
   pthread_mutex_t *mutex;
-  unsigned int bc_seq;
+  int private;
 };
 
 
-void
-__attribute__ ((visibility ("hidden")))
-__condvar_cleanup (void *arg)
+/* Decrease the waiter reference count.  */
+static void
+__condvar_confirm_wakeup (pthread_cond_t *cond, int private)
 {
-  struct _condvar_cleanup_buffer *cbuffer =
-    (struct _condvar_cleanup_buffer *) arg;
-  unsigned int destroying;
-  int pshared = (cbuffer->cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
+  /* If destruction is pending (i.e., the wake-request flag is nonzero) and we
+     are the last waiter (prior value of __wrefs was 1 << 3), then wake any
+     threads waiting in pthread_cond_destroy.  Release MO to synchronize with
+     these threads.  Don't bother clearing the wake-up request flag.  */
+  if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3)
+    futex_wake (&cond->__data.__wrefs, INT_MAX, private);
+}
+
 
-  /* We are going to modify shared data.  */
-  lll_lock (cbuffer->cond->__data.__lock, pshared);
+/* Cancel waiting after having registered as a waiter previously.  SEQ is our
+   position and G is our group index.
+   The goal of cancellation is to make our group smaller if that is still
+   possible.  If we are in a closed group, this is not possible anymore; in
+   this case, we need to send a replacement signal for the one we effectively
+   consumed because the signal should have gotten consumed by another waiter
+   instead; we must not both cancel waiting and consume a signal.
+
+   Must not be called while still holding a reference on the group.
+
+   Returns true iff we consumed a signal.
+
+   On some kind of timeouts, we may be able to pretend that a signal we
+   effectively consumed happened before the timeout (i.e., similarly to first
+   spinning on signals before actually checking whether the timeout has
+   passed already).  Doing this would allow us to skip sending a replacement
+   signal, but this case might happen rarely because the end of the timeout
+   must race with someone else sending a signal.  Therefore, we don't bother
+   trying to optimize this.  */
+static void
+__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
+			  int private)
+{
+  bool consumed_signal = false;
 
-  if (cbuffer->bc_seq == cbuffer->cond->__data.__broadcast_seq)
+  /* No deadlock with group switching is possible here because we have do
+     not hold a reference on the group.  */
+  __condvar_acquire_lock (cond, private);
+
+  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+  if (g1_start > seq)
+    {
+      /* Our group is closed, so someone provided enough signals for it.
+	 Thus, we effectively consumed a signal.  */
+      consumed_signal = true;
+    }
+  else
     {
-      /* This thread is not waiting anymore.  Adjust the sequence counters
-	 appropriately.  We do not increment WAKEUP_SEQ if this would
-	 bump it over the value of TOTAL_SEQ.  This can happen if a thread
-	 was woken and then canceled.  */
-      if (cbuffer->cond->__data.__wakeup_seq
-	  < cbuffer->cond->__data.__total_seq)
+      if (g1_start + __condvar_get_orig_size (cond) <= seq)
+	{
+	  /* We are in the current G2 and thus cannot have consumed a signal.
+	     Reduce its effective size or handle overflow.  Remember that in
+	     G2, unsigned int size is zero or a negative value.  */
+	  if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0)
+	    {
+	      cond->__data.__g_size[g]--;
+	    }
+	  else
+	    {
+	      /* Cancellations would overflow the maximum group size.  Just
+		 wake up everyone spuriously to create a clean state.  This
+		 also means we do not consume a signal someone else sent.  */
+	      __condvar_release_lock (cond, private);
+	      __pthread_cond_broadcast (cond);
+	      return;
+	    }
+	}
+      else
 	{
-	  ++cbuffer->cond->__data.__wakeup_seq;
-	  ++cbuffer->cond->__data.__futex;
+	  /* We are in current G1.  If the group's size is zero, someone put
+	     a signal in the group that nobody else but us can consume.  */
+	  if (cond->__data.__g_size[g] == 0)
+	    consumed_signal = true;
+	  else
+	    {
+	      /* Otherwise, we decrease the size of the group.  This is
+		 equivalent to atomically putting in a signal just for us and
+		 consuming it right away.  We do not consume a signal sent
+		 by someone else.  We also cannot have consumed a futex
+		 wake-up because if we were cancelled or timed out in a futex
+		 call, the futex will wake another waiter.  */
+	      cond->__data.__g_size[g]--;
+	    }
 	}
-      ++cbuffer->cond->__data.__woken_seq;
     }
 
-  cbuffer->cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT;
+  __condvar_release_lock (cond, private);
 
-  /* If pthread_cond_destroy was called on this variable already,
-     notify the pthread_cond_destroy caller all waiters have left
-     and it can be successfully destroyed.  */
-  destroying = 0;
-  if (cbuffer->cond->__data.__total_seq == -1ULL
-      && cbuffer->cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT))
+  if (consumed_signal)
     {
-      lll_futex_wake (&cbuffer->cond->__data.__nwaiters, 1, pshared);
-      destroying = 1;
+      /* We effectively consumed a signal even though we didn't want to.
+	 Therefore, we need to send a replacement signal.
+	 If we would want to optimize this, we could do what
+	 pthread_cond_signal does right in the critical section above.  */
+      __pthread_cond_signal (cond);
     }
+}
 
-  /* We are done.  */
-  lll_unlock (cbuffer->cond->__data.__lock, pshared);
-
-  /* Wake everybody to make sure no condvar signal gets lost.  */
-  if (! destroying)
-    lll_futex_wake (&cbuffer->cond->__data.__futex, INT_MAX, pshared);
-
-  /* Get the mutex before returning unless asynchronous cancellation
-     is in effect.  We don't try to get the mutex if we already own it.  */
-  if (!(USE_REQUEUE_PI (cbuffer->mutex))
-      || ((cbuffer->mutex->__data.__lock & FUTEX_TID_MASK)
-	  != THREAD_GETMEM (THREAD_SELF, tid)))
-  {
-    __pthread_mutex_cond_lock (cbuffer->mutex);
-  }
-  else
-    __pthread_mutex_cond_lock_adjust (cbuffer->mutex);
+/* Wake up any signalers that might be waiting.  */
+static void
+__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
+{
+  /* Release MO to synchronize-with the acquire load in
+     __condvar_quiesce_and_switch_g1.  */
+  if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
+    {
+      /* Clear the wake-up request flag before waking up.  We do not need more
+	 than relaxed MO and it doesn't matter if we apply this for an aliased
+	 group because we wake all futex waiters right after clearing the
+	 flag.  */
+      atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
+      futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
+    }
 }
 
+/* Clean-up for cancellation of waiters waiting for normal signals.  We cancel
+   our registration as a waiter, confirm we have woken up, and re-acquire the
+   mutex.  */
+static void
+__condvar_cleanup_waiting (void *arg)
+{
+  struct _condvar_cleanup_buffer *cbuffer =
+    (struct _condvar_cleanup_buffer *) arg;
+  pthread_cond_t *cond = cbuffer->cond;
+  unsigned g = cbuffer->wseq & 1;
 
-int
-__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
+  __condvar_dec_grefs (cond, g, cbuffer->private);
+
+  __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
+  /* FIXME With the current cancellation implementation, it is possible that
+     a thread is cancelled after it has returned from a syscall.  This could
+     result in a cancelled waiter consuming a futex wake-up that is then
+     causing another waiter in the same group to not wake up.  To work around
+     this issue until we have fixed cancellation, just add a futex wake-up
+     conservatively.  */
+  futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private);
+
+  __condvar_confirm_wakeup (cond, cbuffer->private);
+
+  /* XXX If locking the mutex fails, should we just stop execution?  This
+     might be better than silently ignoring the error.  */
+  __pthread_mutex_cond_lock (cbuffer->mutex);
+}
+
+/* This condvar implementation guarantees that all calls to signal and
+   broadcast and all of the three virtually atomic parts of each call to wait
+   (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
+   acquiring the mutex) happen in some total order that is consistent with the
+   happens-before relations in the calling program.  However, this order does
+   not necessarily result in additional happens-before relations being
+   established (which aligns well with spurious wake-ups being allowed).
+
+   All waiters acquire a certain position in a 64b waiter sequence (__wseq).
+   This sequence determines which waiters are allowed to consume signals.
+   A broadcast is equal to sending as many signals as are unblocked waiters.
+   When a signal arrives, it samples the current value of __wseq with a
+   relaxed-MO load (i.e., the position the next waiter would get).  (This is
+   sufficient because it is consistent with happens-before; the caller can
+   enforce stronger ordering constraints by calling signal while holding the
+   mutex.)  Only waiters with a position less than the __wseq value observed
+   by the signal are eligible to consume this signal.
+
+   This would be straight-forward to implement if waiters would just spin but
+   we need to let them block using futexes.  Futexes give no guarantee of
+   waking in FIFO order, so we cannot reliably wake eligible waiters if we
+   just use a single futex.  Also, futex words are 32b in size, but we need
+   to distinguish more than 1<<32 states because we need to represent the
+   order of wake-up (and thus which waiters are eligible to consume signals);
+   blocking in a futex is not atomic with a waiter determining its position in
+   the waiter sequence, so we need the futex word to reliably notify waiters
+   that they should not attempt to block anymore because they have been
+   already signaled in the meantime.  While an ABA issue on a 32b value will
+   be rare, ignoring it when we are aware of it is not the right thing to do
+   either.
+
+   Therefore, we use a 64b counter to represent the waiter sequence (on
+   architectures which only support 32b atomics, we use a few bits less).
+   To deal with the blocking using futexes, we maintain two groups of waiters:
+   * Group G1 consists of waiters that are all eligible to consume signals;
+     incoming signals will always signal waiters in this group until all
+     waiters in G1 have been signaled.
+   * Group G2 consists of waiters that arrive when a G1 is present and still
+     contains waiters that have not been signaled.  When all waiters in G1
+     are signaled and a new signal arrives, the new signal will convert G2
+     into the new G1 and create a new G2 for future waiters.
+
+   We cannot allocate new memory because of process-shared condvars, so we
+   have just two slots of groups that change their role between G1 and G2.
+   Each has a separate futex word, a number of signals available for
+   consumption, a size (number of waiters in the group that have not been
+   signaled), and a reference count.
+
+   The group reference count is used to maintain the number of waiters that
+   are using the group's futex.  Before a group can change its role, the
+   reference count must show that no waiters are using the futex anymore; this
+   prevents ABA issues on the futex word.
+
+   To represent which intervals in the waiter sequence the groups cover (and
+   thus also which group slot contains G1 or G2), we use a 64b counter to
+   designate the start position of G1 (inclusive), and a single bit in the
+   waiter sequence counter to represent which group slot currently contains
+   G2.  This allows us to switch group roles atomically wrt. waiters obtaining
+   a position in the waiter sequence.  The G1 start position allows waiters to
+   figure out whether they are in a group that has already been completely
+   signaled (i.e., if the current G1 starts at a later position that the
+   waiter's position).  Waiters cannot determine whether they are currently
+   in G2 or G1 -- but they do not have too because all they are interested in
+   is whether there are available signals, and they always start in G2 (whose
+   group slot they know because of the bit in the waiter sequence.  Signalers
+   will simply fill the right group until it is completely signaled and can
+   be closed (they do not switch group roles until they really have to to
+   decrease the likelihood of having to wait for waiters still holding a
+   reference on the now-closed G1).
+
+   Signalers maintain the initial size of G1 to be able to determine where
+   G2 starts (G2 is always open-ended until it becomes G1).  They track the
+   remaining size of a group; when waiters cancel waiting (due to PThreads
+   cancellation or timeouts), they will decrease this remaining size as well.
+
+   To implement condvar destruction requirements (i.e., that
+   pthread_cond_destroy can be called as soon as all waiters have been
+   signaled), waiters increment a reference count before starting to wait and
+   decrement it after they stopped waiting but right before they acquire the
+   mutex associated with the condvar.
+
+   pthread_cond_t thus consists of the following (bits that are used for
+   flags and are not part of the primary value of each field but necessary
+   to make some things atomic or because there was no space for them
+   elsewhere in the data structure):
+
+   __wseq: Waiter sequence counter
+     * LSB is index of current G2.
+     * Waiters fetch-add while having acquire the mutex associated with the
+       condvar.  Signalers load it and fetch-xor it concurrently.
+   __g1_start: Starting position of G1 (inclusive)
+     * LSB is index of current G2.
+     * Modified by signalers while having acquired the condvar-internal lock
+       and observed concurrently by waiters.
+   __g1_orig_size: Initial size of G1
+     * The two least-significant bits represent the condvar-internal lock.
+     * Only accessed while having acquired the condvar-internal lock.
+   __wrefs: Waiter reference counter.
+     * Bit 2 is true if waiters should run futex_wake when they remove the
+       last reference.  pthread_cond_destroy uses this as futex word.
+     * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
+     * Bit 0 is true iff this is a process-shared condvar.
+     * Simple reference count used by both waiters and pthread_cond_destroy.
+     (If the format of __wrefs is changed, update nptl_lock_constants.pysym
+      and the pretty printers.)
+   For each of the two groups, we have:
+   __g_refs: Futex waiter reference count.
+     * LSB is true if waiters should run futex_wake when they remove the
+       last reference.
+     * Reference count used by waiters concurrently with signalers that have
+       acquired the condvar-internal lock.
+   __g_signals: The number of signals that can still be consumed.
+     * Used as a futex word by waiters.  Used concurrently by waiters and
+       signalers.
+     * LSB is true iff this group has been completely signaled (i.e., it is
+       closed).
+   __g_size: Waiters remaining in this group (i.e., which have not been
+     signaled yet.
+     * Accessed by signalers and waiters that cancel waiting (both do so only
+       when having acquired the condvar-internal lock.
+     * The size of G2 is always zero because it cannot be determined until
+       the group becomes G1.
+     * Although this is of unsigned type, we rely on using unsigned overflow
+       rules to make this hold effectively negative values too (in
+       particular, when waiters in G2 cancel waiting).
+
+   A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
+   a condvar that has G2 starting at position 0 and a G1 that is closed.
+
+   Because waiters do not claim ownership of a group right when obtaining a
+   position in __wseq but only reference count the group when using futexes
+   to block, it can happen that a group gets closed before a waiter can
+   increment the reference count.  Therefore, waiters have to check whether
+   their group is already closed using __g1_start.  They also have to perform
+   this check when spinning when trying to grab a signal from __g_signals.
+   Note that for these checks, using relaxed MO to load __g1_start is
+   sufficient because if a waiter can see a sufficiently large value, it could
+   have also consume a signal in the waiters group.
+
+   Waiters try to grab a signal from __g_signals without holding a reference
+   count, which can lead to stealing a signal from a more recent group after
+   their own group was already closed.  They cannot always detect whether they
+   in fact did because they do not know when they stole, but they can
+   conservatively add a signal back to the group they stole from; if they
+   did so unnecessarily, all that happens is a spurious wake-up.  To make this
+   even less likely, __g1_start contains the index of the current g2 too,
+   which allows waiters to check if there aliasing on the group slots; if
+   there wasn't, they didn't steal from the current G1, which means that the
+   G1 they stole from must have been already closed and they do not need to
+   fix anything.
+
+   It is essential that the last field in pthread_cond_t is __g_signals[1]:
+   The previous condvar used a pointer-sized field in pthread_cond_t, so a
+   PTHREAD_COND_INITIALIZER from that condvar implementation might only
+   initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
+   in total instead of the 48 we need).  __g_signals[1] is not accessed before
+   the first group switch (G2 starts at index 0), which will set its value to
+   zero after a harmless fetch-or whose return value is ignored.  This
+   effectively completes initialization.
+
+
+   Limitations:
+   * This condvar isn't designed to allow for more than
+     __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait.
+   * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
+     supported.
+   * Beyond what is allowed as errors by POSIX or documented, we can also
+     return the following errors:
+     * EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
+     * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes.  Unlike
+       for other errors, this can happen when we re-acquire the mutex; this
+       isn't allowed by POSIX (which requires all errors to virtually happen
+       before we release the mutex or change the condvar state), but there's
+       nothing we can do really.
+     * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
+       returned by __pthread_tpp_change_priority.  We will already have
+       released the mutex in such cases, so the caller cannot expect to own
+       MUTEX.
+
+   Other notes:
+   * Instead of the normal mutex unlock / lock functions, we use
+     __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
+     because those will not change the mutex-internal users count, so that it
+     can be detected when a condvar is still associated with a particular
+     mutex because there is a waiter blocked on this condvar using this mutex.
+*/
+static __always_inline int
+__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
+    const struct timespec *abstime)
 {
-  struct _pthread_cleanup_buffer buffer;
-  struct _condvar_cleanup_buffer cbuffer;
+  const int maxspin = 0;
   int err;
-  int pshared = (cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
-
-#if (defined lll_futex_wait_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-  int pi_flag = 0;
-#endif
+  int result = 0;
 
   LIBC_PROBE (cond_wait, 2, cond, mutex);
 
-  /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
-
-  /* Now we can release the mutex.  */
+  /* Acquire a position (SEQ) in the waiter sequence (WSEQ).  We use an
+     atomic operation because signals and broadcasts may update the group
+     switch without acquiring the mutex.  We do not need release MO here
+     because we do not need to establish any happens-before relation with
+     signalers (see __pthread_cond_signal); modification order alone
+     establishes a total order of waiters/signals.  We do need acquire MO
+     to synchronize with group reinitialization in
+     __condvar_quiesce_and_switch_g1.  */
+  uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
+  /* Find our group's index.  We always go into what was G2 when we acquired
+     our position.  */
+  unsigned int g = wseq & 1;
+  uint64_t seq = wseq >> 1;
+
+  /* Increase the waiter reference count.  Relaxed MO is sufficient because
+     we only need to synchronize when decrementing the reference count.  */
+  unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
+  int private = __condvar_get_private (flags);
+
+  /* Now that we are registered as a waiter, we can release the mutex.
+     Waiting on the condvar must be atomic with releasing the mutex, so if
+     the mutex is used to establish a happens-before relation with any
+     signaler, the waiter must be visible to the latter; thus, we release the
+     mutex after registering as waiter.
+     If releasing the mutex fails, we just cancel our registration as a
+     waiter and confirm that we have woken up.  */
   err = __pthread_mutex_unlock_usercnt (mutex, 0);
-  if (__glibc_unlikely (err))
+  if (__glibc_unlikely (err != 0))
     {
-      lll_unlock (cond->__data.__lock, pshared);
+      __condvar_cancel_waiting (cond, seq, g, private);
+      __condvar_confirm_wakeup (cond, private);
       return err;
     }
 
-  /* We have one new user of the condvar.  */
-  ++cond->__data.__total_seq;
-  ++cond->__data.__futex;
-  cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT;
-
-  /* Remember the mutex we are using here.  If there is already a
-     different address store this is a bad user bug.  Do not store
-     anything for pshared condvars.  */
-  if (cond->__data.__mutex != (void *) ~0l)
-    cond->__data.__mutex = mutex;
-
-  /* Prepare structure passed to cancellation handler.  */
-  cbuffer.cond = cond;
-  cbuffer.mutex = mutex;
-
-  /* Before we block we enable cancellation.  Therefore we have to
-     install a cancellation handler.  */
-  __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer);
-
-  /* The current values of the wakeup counter.  The "woken" counter
-     must exceed this value.  */
-  unsigned long long int val;
-  unsigned long long int seq;
-  val = seq = cond->__data.__wakeup_seq;
-  /* Remember the broadcast counter.  */
-  cbuffer.bc_seq = cond->__data.__broadcast_seq;
+  /* Now wait until a signal is available in our group or it is closed.
+     Acquire MO so that if we observe a value of zero written after group
+     switching in __condvar_quiesce_and_switch_g1, we synchronize with that
+     store and will see the prior update of __g1_start done while switching
+     groups too.  */
+  unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
 
   do
     {
-      unsigned int futex_val = cond->__data.__futex;
-      /* Prepare to wait.  Release the condvar futex.  */
-      lll_unlock (cond->__data.__lock, pshared);
-
-      /* Enable asynchronous cancellation.  Required by the standard.  */
-      cbuffer.oldtype = __pthread_enable_asynccancel ();
-
-#if (defined lll_futex_wait_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-      /* If pi_flag remained 1 then it means that we had the lock and the mutex
-	 but a spurious waker raced ahead of us.  Give back the mutex before
-	 going into wait again.  */
-      if (pi_flag)
+      while (1)
 	{
-	  __pthread_mutex_cond_lock_adjust (mutex);
-	  __pthread_mutex_unlock_usercnt (mutex, 0);
+	  /* Spin-wait first.
+	     Note that spinning first without checking whether a timeout
+	     passed might lead to what looks like a spurious wake-up even
+	     though we should return ETIMEDOUT (e.g., if the caller provides
+	     an absolute timeout that is clearly in the past).  However,
+	     (1) spurious wake-ups are allowed, (2) it seems unlikely that a
+	     user will (ab)use pthread_cond_wait as a check for whether a
+	     point in time is in the past, and (3) spinning first without
+	     having to compare against the current time seems to be the right
+	     choice from a performance perspective for most use cases.  */
+	  unsigned int spin = maxspin;
+	  while (signals == 0 && spin > 0)
+	    {
+	      /* Check that we are not spinning on a group that's already
+		 closed.  */
+	      if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
+		goto done;
+
+	      /* TODO Back off.  */
+
+	      /* Reload signals.  See above for MO.  */
+	      signals = atomic_load_acquire (cond->__data.__g_signals + g);
+	      spin--;
+	    }
+
+	  /* If our group will be closed as indicated by the flag on signals,
+	     don't bother grabbing a signal.  */
+	  if (signals & 1)
+	    goto done;
+
+	  /* If there is an available signal, don't block.  */
+	  if (signals != 0)
+	    break;
+
+	  /* No signals available after spinning, so prepare to block.
+	     We first acquire a group reference and use acquire MO for that so
+	     that we synchronize with the dummy read-modify-write in
+	     __condvar_quiesce_and_switch_g1 if we read from that.  In turn,
+	     in this case this will make us see the closed flag on __g_signals
+	     that designates a concurrent attempt to reuse the group's slot.
+	     We use acquire MO for the __g_signals check to make the
+	     __g1_start check work (see spinning above).
+	     Note that the group reference acquisition will not mask the
+	     release MO when decrementing the reference count because we use
+	     an atomic read-modify-write operation and thus extend the release
+	     sequence.  */
+	  atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
+	  if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
+	      || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
+	    {
+	      /* Our group is closed.  Wake up any signalers that might be
+		 waiting.  */
+	      __condvar_dec_grefs (cond, g, private);
+	      goto done;
+	    }
+
+	  // Now block.
+	  struct _pthread_cleanup_buffer buffer;
+	  struct _condvar_cleanup_buffer cbuffer;
+	  cbuffer.wseq = wseq;
+	  cbuffer.cond = cond;
+	  cbuffer.mutex = mutex;
+	  cbuffer.private = private;
+	  __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
+
+	  if (abstime == NULL)
+	    {
+	      /* Block without a timeout.  */
+	      err = futex_wait_cancelable (
+		  cond->__data.__g_signals + g, 0, private);
+	    }
+	  else
+	    {
+	      /* Block, but with a timeout.
+		 Work around the fact that the kernel rejects negative timeout
+		 values despite them being valid.  */
+	      if (__glibc_unlikely (abstime->tv_sec < 0))
+	        err = ETIMEDOUT;
+
+	      else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
+		{
+		  /* CLOCK_MONOTONIC is requested.  */
+		  struct timespec rt;
+		  if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
+		    __libc_fatal ("clock_gettime does not support "
+				  "CLOCK_MONOTONIC");
+		  /* Convert the absolute timeout value to a relative
+		     timeout.  */
+		  rt.tv_sec = abstime->tv_sec - rt.tv_sec;
+		  rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
+		  if (rt.tv_nsec < 0)
+		    {
+		      rt.tv_nsec += 1000000000;
+		      --rt.tv_sec;
+		    }
+		  /* Did we already time out?  */
+		  if (__glibc_unlikely (rt.tv_sec < 0))
+		    err = ETIMEDOUT;
+		  else
+		    err = futex_reltimed_wait_cancelable
+			(cond->__data.__g_signals + g, 0, &rt, private);
+		}
+	      else
+		{
+		  /* Use CLOCK_REALTIME.  */
+		  err = futex_abstimed_wait_cancelable
+		      (cond->__data.__g_signals + g, 0, abstime, private);
+		}
+	    }
+
+	  __pthread_cleanup_pop (&buffer, 0);
+
+	  if (__glibc_unlikely (err == ETIMEDOUT))
+	    {
+	      __condvar_dec_grefs (cond, g, private);
+	      /* If we timed out, we effectively cancel waiting.  Note that
+		 we have decremented __g_refs before cancellation, so that a
+		 deadlock between waiting for quiescence of our group in
+		 __condvar_quiesce_and_switch_g1 and us trying to acquire
+		 the lock during cancellation is not possible.  */
+	      __condvar_cancel_waiting (cond, seq, g, private);
+	      result = ETIMEDOUT;
+	      goto done;
+	    }
+	  else
+	    __condvar_dec_grefs (cond, g, private);
+
+	  /* Reload signals.  See above for MO.  */
+	  signals = atomic_load_acquire (cond->__data.__g_signals + g);
 	}
-      pi_flag = USE_REQUEUE_PI (mutex);
 
-      if (pi_flag)
+    }
+  /* Try to grab a signal.  Use acquire MO so that we see an up-to-date value
+     of __g1_start below (see spinning above for a similar case).  In
+     particular, if we steal from a more recent group, we will also see a
+     more recent __g1_start below.  */
+  while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
+						&signals, signals - 2));
+
+  /* We consumed a signal but we could have consumed from a more recent group
+     that aliased with ours due to being in the same group slot.  If this
+     might be the case our group must be closed as visible through
+     __g1_start.  */
+  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+  if (seq < (g1_start >> 1))
+    {
+      /* We potentially stole a signal from a more recent group but we do not
+	 know which group we really consumed from.
+	 We do not care about groups older than current G1 because they are
+	 closed; we could have stolen from these, but then we just add a
+	 spurious wake-up for the current groups.
+	 We will never steal a signal from current G2 that was really intended
+	 for G2 because G2 never receives signals (until it becomes G1).  We
+	 could have stolen a signal from G2 that was conservatively added by a
+	 previous waiter that also thought it stole a signal -- but given that
+	 that signal was added unnecessarily, it's not a problem if we steal
+	 it.
+	 Thus, the remaining case is that we could have stolen from the current
+	 G1, where "current" means the __g1_start value we observed.  However,
+	 if the current G1 does not have the same slot index as we do, we did
+	 not steal from it and do not need to undo that.  This is the reason
+	 for putting a bit with G2's index into__g1_start as well.  */
+      if (((g1_start & 1) ^ 1) == g)
 	{
-	  err = lll_futex_wait_requeue_pi (&cond->__data.__futex,
-					   futex_val, &mutex->__data.__lock,
-					   pshared);
-
-	  pi_flag = (err == 0);
+	  /* We have to conservatively undo our potential mistake of stealing
+	     a signal.  We can stop trying to do that when the current G1
+	     changes because other spinning waiters will notice this too and
+	     __condvar_quiesce_and_switch_g1 has checked that there are no
+	     futex waiters anymore before switching G1.
+	     Relaxed MO is fine for the __g1_start load because we need to
+	     merely be able to observe this fact and not have to observe
+	     something else as well.
+	     ??? Would it help to spin for a little while to see whether the
+	     current G1 gets closed?  This might be worthwhile if the group is
+	     small or close to being closed.  */
+	  unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
+	  while (__condvar_load_g1_start_relaxed (cond) == g1_start)
+	    {
+	      /* Try to add a signal.  We don't need to acquire the lock
+		 because at worst we can cause a spurious wake-up.  If the
+		 group is in the process of being closed (LSB is true), this
+		 has an effect similar to us adding a signal.  */
+	      if (((s & 1) != 0)
+		  || atomic_compare_exchange_weak_relaxed
+		       (cond->__data.__g_signals + g, &s, s + 2))
+		{
+		  /* If we added a signal, we also need to add a wake-up on
+		     the futex.  We also need to do that if we skipped adding
+		     a signal because the group is being closed because
+		     while __condvar_quiesce_and_switch_g1 could have closed
+		     the group, it might stil be waiting for futex waiters to
+		     leave (and one of those waiters might be the one we stole
+		     the signal from, which cause it to block using the
+		     futex).  */
+		  futex_wake (cond->__data.__g_signals + g, 1, private);
+		  break;
+		}
+	      /* TODO Back off.  */
+	    }
 	}
-      else
-#endif
-	  /* Wait until woken by signal or broadcast.  */
-	lll_futex_wait (&cond->__data.__futex, futex_val, pshared);
-
-      /* Disable asynchronous cancellation.  */
-      __pthread_disable_asynccancel (cbuffer.oldtype);
-
-      /* We are going to look at shared data again, so get the lock.  */
-      lll_lock (cond->__data.__lock, pshared);
-
-      /* If a broadcast happened, we are done.  */
-      if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
-	goto bc_out;
-
-      /* Check whether we are eligible for wakeup.  */
-      val = cond->__data.__wakeup_seq;
     }
-  while (val == seq || cond->__data.__woken_seq == val);
 
-  /* Another thread woken up.  */
-  ++cond->__data.__woken_seq;
+ done:
 
- bc_out:
+  /* Confirm that we have been woken.  We do that before acquiring the mutex
+     to allow for execution of pthread_cond_destroy while having acquired the
+     mutex.  */
+  __condvar_confirm_wakeup (cond, private);
 
-  cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT;
-
-  /* If pthread_cond_destroy was called on this varaible already,
-     notify the pthread_cond_destroy caller all waiters have left
-     and it can be successfully destroyed.  */
-  if (cond->__data.__total_seq == -1ULL
-      && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT))
-    lll_futex_wake (&cond->__data.__nwaiters, 1, pshared);
+  /* Woken up; now re-acquire the mutex.  If this doesn't fail, return RESULT,
+     which is set to ETIMEDOUT if a timeout occured, or zero otherwise.  */
+  err = __pthread_mutex_cond_lock (mutex);
+  /* XXX Abort on errors that are disallowed by POSIX?  */
+  return (err != 0) ? err : result;
+}
 
-  /* We are done with the condvar.  */
-  lll_unlock (cond->__data.__lock, pshared);
 
-  /* The cancellation handling is back to normal, remove the handler.  */
-  __pthread_cleanup_pop (&buffer, 0);
+/* See __pthread_cond_wait_common.  */
+int
+__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+  return __pthread_cond_wait_common (cond, mutex, NULL);
+}
 
-  /* Get the mutex before returning.  Not needed for PI.  */
-#if (defined lll_futex_wait_requeue_pi \
-     && defined __ASSUME_REQUEUE_PI)
-  if (pi_flag)
-    {
-      __pthread_mutex_cond_lock_adjust (mutex);
-      return 0;
-    }
-  else
-#endif
-    return __pthread_mutex_cond_lock (mutex);
+/* See __pthread_cond_wait_common.  */
+int
+__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
+    const struct timespec *abstime)
+{
+  /* Check parameter validity.  This should also tell the compiler that
+     it can assume that abstime is not NULL.  */
+  if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+    return EINVAL;
+  return __pthread_cond_wait_common (cond, mutex, abstime);
 }
 
 versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
 		  GLIBC_2_3_2);
+versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
+		  GLIBC_2_3_2);
diff --git a/nptl/pthread_condattr_getclock.c b/nptl/pthread_condattr_getclock.c
index d156302ffb..cecb4aa8a5 100644
--- a/nptl/pthread_condattr_getclock.c
+++ b/nptl/pthread_condattr_getclock.c
@@ -23,6 +23,6 @@ int
 pthread_condattr_getclock (const pthread_condattr_t *attr, clockid_t *clock_id)
 {
   *clock_id = (((((const struct pthread_condattr *) attr)->value) >> 1)
-	       & ((1 << COND_NWAITERS_SHIFT) - 1));
+	       & ((1 << COND_CLOCK_BITS) - 1));
   return 0;
 }
diff --git a/nptl/pthread_condattr_getpshared.c b/nptl/pthread_condattr_getpshared.c
index 5a10f3eeb0..814796690c 100644
--- a/nptl/pthread_condattr_getpshared.c
+++ b/nptl/pthread_condattr_getpshared.c
@@ -22,7 +22,8 @@
 int
 pthread_condattr_getpshared (const pthread_condattr_t *attr, int *pshared)
 {
-  *pshared = ((const struct pthread_condattr *) attr)->value & 1;
+  *pshared = (((const struct pthread_condattr *) attr)->value & 1
+	      ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE);
 
   return 0;
 }
diff --git a/nptl/pthread_condattr_init.c b/nptl/pthread_condattr_init.c
index 0ce42e5dfa..6e5168dadd 100644
--- a/nptl/pthread_condattr_init.c
+++ b/nptl/pthread_condattr_init.c
@@ -23,7 +23,9 @@
 int
 __pthread_condattr_init (pthread_condattr_t *attr)
 {
-  memset (attr, '\0', sizeof (*attr));
+  struct pthread_condattr *iattr = (struct pthread_condattr *) attr;
+  /* Default is not pshared and CLOCK_REALTIME.  */
+  iattr-> value = CLOCK_REALTIME << 1;
 
   return 0;
 }
diff --git a/nptl/pthread_condattr_setclock.c b/nptl/pthread_condattr_setclock.c
index 25e2a176a0..3cfad84cda 100644
--- a/nptl/pthread_condattr_setclock.c
+++ b/nptl/pthread_condattr_setclock.c
@@ -18,7 +18,7 @@
 
 #include <assert.h>
 #include <errno.h>
-#include <stdbool.h>
+#include <futex-internal.h>
 #include <time.h>
 #include <sysdep.h>
 #include "pthreadP.h"
@@ -33,12 +33,17 @@ pthread_condattr_setclock (pthread_condattr_t *attr, clockid_t clock_id)
        in the pthread_cond_t structure needs to be adjusted.  */
     return EINVAL;
 
+  /* If we do not support waiting using CLOCK_MONOTONIC, return an error.  */
+  if (clock_id == CLOCK_MONOTONIC
+      && !futex_supports_exact_relative_timeouts())
+    return ENOTSUP;
+
   /* Make sure the value fits in the bits we reserved.  */
-  assert (clock_id < (1 << COND_NWAITERS_SHIFT));
+  assert (clock_id < (1 << COND_CLOCK_BITS));
 
   int *valuep = &((struct pthread_condattr *) attr)->value;
 
-  *valuep = ((*valuep & ~(((1 << COND_NWAITERS_SHIFT) - 1) << 1))
+  *valuep = ((*valuep & ~(((1 << COND_CLOCK_BITS) - 1) << 1))
 	     | (clock_id << 1));
 
   return 0;
diff --git a/nptl/test-cond-printers.py b/nptl/test-cond-printers.py
index af0e12eb97..9e807c9f2c 100644
--- a/nptl/test-cond-printers.py
+++ b/nptl/test-cond-printers.py
@@ -35,7 +35,7 @@ try:
 
     break_at(test_source, 'Test status (destroyed)')
     continue_cmd() # Go to test_status_destroyed
-    test_printer(var, to_string, {'Status': 'Destroyed'})
+    test_printer(var, to_string, {'Threads known to still execute a wait function': '0'})
 
     continue_cmd() # Exit
 
diff --git a/nptl/tst-cond1.c b/nptl/tst-cond1.c
index 75ab9c8d8a..509bbd0be4 100644
--- a/nptl/tst-cond1.c
+++ b/nptl/tst-cond1.c
@@ -73,6 +73,9 @@ do_test (void)
 
   puts ("parent: wait for condition");
 
+  /* This test will fail on spurious wake-ups, which are allowed; however,
+     the current implementation shouldn't produce spurious wake-ups in the
+     scenario we are testing here.  */
   err = pthread_cond_wait (&cond, &mut);
   if (err != 0)
     error (EXIT_FAILURE, err, "parent: cannot wait fir signal");
diff --git a/nptl/tst-cond20.c b/nptl/tst-cond20.c
index 918c4adb51..665a66a92e 100644
--- a/nptl/tst-cond20.c
+++ b/nptl/tst-cond20.c
@@ -96,7 +96,10 @@ do_test (void)
 
   for (i = 0; i < ROUNDS; ++i)
     {
-      pthread_cond_wait (&cond2, &mut);
+      /* Make sure we discard spurious wake-ups.  */
+      do
+	pthread_cond_wait (&cond2, &mut);
+      while (count != N);
 
       if (i & 1)
         pthread_mutex_unlock (&mut);
diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c
index bd978e50ca..64f19ea0a5 100644
--- a/nptl/tst-cond22.c
+++ b/nptl/tst-cond22.c
@@ -106,10 +106,11 @@ do_test (void)
       status = 1;
     }
 
-  printf ("cond = { %d, %x, %lld, %lld, %lld, %p, %u, %u }\n",
-	  c.__data.__lock, c.__data.__futex, c.__data.__total_seq,
-	  c.__data.__wakeup_seq, c.__data.__woken_seq, c.__data.__mutex,
-	  c.__data.__nwaiters, c.__data.__broadcast_seq);
+  printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
+	  c.__data.__wseq, c.__data.__g1_start,
+	  c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
+	  c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
+	  c.__data.__g1_orig_size, c.__data.__wrefs);
 
   if (pthread_create (&th, NULL, tf, (void *) 1l) != 0)
     {
@@ -148,10 +149,11 @@ do_test (void)
       status = 1;
     }
 
-  printf ("cond = { %d, %x, %lld, %lld, %lld, %p, %u, %u }\n",
-	  c.__data.__lock, c.__data.__futex, c.__data.__total_seq,
-	  c.__data.__wakeup_seq, c.__data.__woken_seq, c.__data.__mutex,
-	  c.__data.__nwaiters, c.__data.__broadcast_seq);
+  printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
+	  c.__data.__wseq, c.__data.__g1_start,
+	  c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
+	  c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
+	  c.__data.__g1_orig_size, c.__data.__wrefs);
 
   return status;
 }
diff --git a/sysdeps/aarch64/nptl/bits/pthreadtypes.h b/sysdeps/aarch64/nptl/bits/pthreadtypes.h
index 5bf2eb2928..c265a7c935 100644
--- a/sysdeps/aarch64/nptl/bits/pthreadtypes.h
+++ b/sysdeps/aarch64/nptl/bits/pthreadtypes.h
@@ -106,17 +106,30 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
-  long int __align;
+  __extension__ long long int __align;
 } pthread_cond_t;
 
 typedef union
diff --git a/sysdeps/arm/nptl/bits/pthreadtypes.h b/sysdeps/arm/nptl/bits/pthreadtypes.h
index 645207bf41..354881d976 100644
--- a/sysdeps/arm/nptl/bits/pthreadtypes.h
+++ b/sysdeps/arm/nptl/bits/pthreadtypes.h
@@ -93,14 +93,27 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/ia64/nptl/bits/pthreadtypes.h b/sysdeps/ia64/nptl/bits/pthreadtypes.h
index 0e8388374f..13726ec8b7 100644
--- a/sysdeps/ia64/nptl/bits/pthreadtypes.h
+++ b/sysdeps/ia64/nptl/bits/pthreadtypes.h
@@ -90,17 +90,30 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
-  long int __align;
+  __extension__ long long int __align;
 } pthread_cond_t;
 
 typedef union
diff --git a/sysdeps/m68k/nptl/bits/pthreadtypes.h b/sysdeps/m68k/nptl/bits/pthreadtypes.h
index d195492f35..296990814a 100644
--- a/sysdeps/m68k/nptl/bits/pthreadtypes.h
+++ b/sysdeps/m68k/nptl/bits/pthreadtypes.h
@@ -88,19 +88,33 @@ typedef union
 
 
 /* Data structure for conditional variable handling.  The structure of
-   the attribute type is deliberately not exposed.  */
+   the attribute type is not exposed on purpose.  */
 typedef union
 {
   struct
   {
-    int __lock __attribute__ ((__aligned__ (4)));
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    /* Enforce proper alignment of fields used as futex words.  */
+    unsigned int __g_refs[2] __attribute__ ((__aligned__ (4)));
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/microblaze/nptl/bits/pthreadtypes.h b/sysdeps/microblaze/nptl/bits/pthreadtypes.h
index 2a1ca0a191..7404241f48 100644
--- a/sysdeps/microblaze/nptl/bits/pthreadtypes.h
+++ b/sysdeps/microblaze/nptl/bits/pthreadtypes.h
@@ -91,14 +91,27 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/mips/nptl/bits/pthreadtypes.h b/sysdeps/mips/nptl/bits/pthreadtypes.h
index c7533a8e32..c21f5a7a33 100644
--- a/sysdeps/mips/nptl/bits/pthreadtypes.h
+++ b/sysdeps/mips/nptl/bits/pthreadtypes.h
@@ -117,19 +117,32 @@ typedef union
 
 
 /* Data structure for conditional variable handling.  The structure of
-   the attribute type is deliberately not exposed.  */
+   the attribute type is not exposed on purpose.  */
 typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/nios2/nptl/bits/pthreadtypes.h b/sysdeps/nios2/nptl/bits/pthreadtypes.h
index ee8b7ba762..7b49f5deeb 100644
--- a/sysdeps/nios2/nptl/bits/pthreadtypes.h
+++ b/sysdeps/nios2/nptl/bits/pthreadtypes.h
@@ -88,19 +88,32 @@ typedef union
 
 
 /* Data structure for conditional variable handling.  The structure of
-   the attribute type is deliberately not exposed.  */
+   the attribute type is not exposed on purpose.  */
 typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/nptl/internaltypes.h b/sysdeps/nptl/internaltypes.h
index 203c548550..31e5a4357a 100644
--- a/sysdeps/nptl/internaltypes.h
+++ b/sysdeps/nptl/internaltypes.h
@@ -68,20 +68,13 @@ struct pthread_condattr
 {
   /* Combination of values:
 
-     Bit 0  : flag whether conditional variable will be sharable between
-	      processes.
-
-     Bit 1-7: clock ID.  */
+     Bit 0                : flag whether conditional variable will be
+                            sharable between processes.
+     Bit 1-COND_CLOCK_BITS: Clock ID.  COND_CLOCK_BITS is the number of bits
+                            needed to represent the ID of the clock.  */
   int value;
 };
-
-
-/* The __NWAITERS field is used as a counter and to house the number
-   of bits for other purposes.  COND_CLOCK_BITS is the number
-   of bits needed to represent the ID of the clock.  COND_NWAITERS_SHIFT
-   is the number of bits reserved for other purposes like the clock.  */
-#define COND_CLOCK_BITS		1
-#define COND_NWAITERS_SHIFT	1
+#define COND_CLOCK_BITS	1
 
 
 /* Read-write lock variable attribute data structure.  */
diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h
index cc6517bca1..f3295e6d40 100644
--- a/sysdeps/nptl/pthread.h
+++ b/sysdeps/nptl/pthread.h
@@ -184,7 +184,7 @@ enum
 
 
 /* Conditional variable handling.  */
-#define PTHREAD_COND_INITIALIZER { { 0, 0, 0, 0, 0, (void *) 0, 0, 0 } }
+#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } }
 
 
 /* Cleanup buffers */
diff --git a/sysdeps/s390/nptl/bits/pthreadtypes.h b/sysdeps/s390/nptl/bits/pthreadtypes.h
index 21d13bd7e7..2f1cff0cec 100644
--- a/sysdeps/s390/nptl/bits/pthreadtypes.h
+++ b/sysdeps/s390/nptl/bits/pthreadtypes.h
@@ -142,14 +142,27 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/sh/nptl/bits/pthreadtypes.h b/sysdeps/sh/nptl/bits/pthreadtypes.h
index 5354f78676..34434277e0 100644
--- a/sysdeps/sh/nptl/bits/pthreadtypes.h
+++ b/sysdeps/sh/nptl/bits/pthreadtypes.h
@@ -93,14 +93,27 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/tile/nptl/bits/pthreadtypes.h b/sysdeps/tile/nptl/bits/pthreadtypes.h
index b14b1c6f5e..5254bf21c1 100644
--- a/sysdeps/tile/nptl/bits/pthreadtypes.h
+++ b/sysdeps/tile/nptl/bits/pthreadtypes.h
@@ -122,14 +122,27 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h b/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h
index b7af0380e9..d60dd28c54 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h
@@ -84,19 +84,32 @@ typedef union
 
 
 /* Data structure for conditional variable handling.  The structure of
-   the attribute type is deliberately not exposed.  */
+   the attribute type is not exposed on purpose.  */
 typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/unix/sysv/linux/hppa/internaltypes.h b/sysdeps/unix/sysv/linux/hppa/internaltypes.h
index 651ce2ee45..d6496579da 100644
--- a/sysdeps/unix/sysv/linux/hppa/internaltypes.h
+++ b/sysdeps/unix/sysv/linux/hppa/internaltypes.h
@@ -46,32 +46,38 @@ fails because __initializer is zero, and the structure will be used as
 is correctly.  */
 
 #define cond_compat_clear(var) \
-({											\
-  int tmp = 0;										\
-  var->__data.__lock = 0;								\
-  var->__data.__futex = 0;								\
-  var->__data.__mutex = NULL;								\
-  /* Clear __initializer last, to indicate initialization is done.  */			\
-  __asm__ __volatile__ ("stw,ma %1,0(%0)"						\
-			: : "r" (&var->__data.__initializer), "r" (tmp) : "memory");	\
+({									\
+  int tmp = 0;								\
+  var->__data.__wseq = 0;						\
+  var->__data.__signals_sent = 0;					\
+  var->__data.__confirmed = 0;						\
+  var->__data.__generation = 0;						\
+  var->__data.__mutex = NULL;						\
+  var->__data.__quiescence_waiters = 0;					\
+  var->__data.__clockid = 0;						\
+  /* Clear __initializer last, to indicate initialization is done.  */	\
+  /* This synchronizes-with the acquire load below.  */			\
+  atomic_store_release (&var->__data.__initializer, 0);			\
 })
 
 #define cond_compat_check_and_clear(var) \
 ({								\
-  int ret;							\
-  volatile int *value = &var->__data.__initializer;		\
-  if ((ret = atomic_compare_and_exchange_val_acq(value, 2, 1)))	\
+  int v;							\
+  int *value = &var->__data.__initializer;			\
+  /* This synchronizes-with the release store above.  */	\
+  while ((v = atomic_load_acquire (value)) != 0)		\
     {								\
-      if (ret == 1)						\
+      if (v == 1						\
+	  /* Relaxed MO is fine; it only matters who's first.  */        \
+	  && atomic_compare_exchange_acquire_weak_relaxed (value, 1, 2)) \
 	{							\
-	  /* Initialize structure.  */				\
+	  /* We're first; initialize structure.  */		\
 	  cond_compat_clear (var);				\
+	  break;						\
 	}							\
       else							\
-        {							\
-	  /* Yield until structure is initialized.  */		\
-	  while (*value == 2) sched_yield ();			\
-        }							\
+	/* Yield before we re-check initialization status.  */	\
+	sched_yield ();						\
     }								\
 })
 
diff --git a/sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c b/sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c
deleted file mode 100644
index ec6fd23f58..0000000000
--- a/sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Carlos O'Donell <carlos@codesourcery.com>, 2009.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef INCLUDED_SELF
-# define INCLUDED_SELF
-# include <pthread_cond_timedwait.c>
-#else
-# include <pthread.h>
-# include <pthreadP.h>
-# include <internaltypes.h>
-# include <shlib-compat.h>
-int
-__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
-			  const struct timespec *abstime)
-{
-  cond_compat_check_and_clear (cond);
-  return __pthread_cond_timedwait_internal (cond, mutex, abstime);
-}
-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
-                  GLIBC_2_3_2);
-# undef versioned_symbol
-# define versioned_symbol(lib, local, symbol, version)
-# undef __pthread_cond_timedwait
-# define __pthread_cond_timedwait __pthread_cond_timedwait_internal
-# include_next <pthread_cond_timedwait.c>
-#endif
diff --git a/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c b/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c
index 8f028317c6..0611f7d25f 100644
--- a/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c
+++ b/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c
@@ -32,9 +32,22 @@ __pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
 }
 versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
                   GLIBC_2_3_2);
+int
+__pthread_cond_timedwait (cond, mutex, abstime)
+     pthread_cond_t *cond;
+     pthread_mutex_t *mutex;
+     const struct timespec *abstime;
+{
+  cond_compat_check_and_clear (cond);
+  return __pthread_cond_timedwait_internal (cond, mutex, abstime);
+}
+versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
+                  GLIBC_2_3_2);
 # undef versioned_symbol
 # define versioned_symbol(lib, local, symbol, version)
 # undef __pthread_cond_wait
 # define __pthread_cond_wait __pthread_cond_wait_internal
+# undef __pthread_cond_timedwait
+# define __pthread_cond_timedwait __pthread_cond_timedwait_internal
 # include_next <pthread_cond_wait.c>
 #endif
diff --git a/sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S b/sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S
deleted file mode 100644
index f697e5bd06..0000000000
--- a/sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define HAVE_CMOV 1
-#include "../pthread_cond_timedwait.S"
diff --git a/sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S b/sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S
deleted file mode 100644
index 599668830d..0000000000
--- a/sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <kernel-features.h>
-#include <pthread-pi-defines.h>
-#include <pthread-errnos.h>
-#include <stap-probe.h>
-
-	.text
-
-	/* int pthread_cond_broadcast (pthread_cond_t *cond) */
-	.globl	__pthread_cond_broadcast
-	.type	__pthread_cond_broadcast, @function
-	.align	16
-__pthread_cond_broadcast:
-	cfi_startproc
-	pushl	%ebx
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebx, 0)
-	pushl	%esi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%esi, 0)
-	pushl	%edi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%edi, 0)
-	pushl	%ebp
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebp, 0)
-	cfi_remember_state
-
-	movl	20(%esp), %ebx
-
-	LIBC_PROBE (cond_broadcast, 1, %edx)
-
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jnz	1f
-
-2:	addl	$cond_futex, %ebx
-	movl	total_seq+4-cond_futex(%ebx), %eax
-	movl	total_seq-cond_futex(%ebx), %ebp
-	cmpl	wakeup_seq+4-cond_futex(%ebx), %eax
-	ja	3f
-	jb	4f
-	cmpl	wakeup_seq-cond_futex(%ebx), %ebp
-	jna	4f
-
-	/* Cause all currently waiting threads to recognize they are
-	   woken up.  */
-3:	movl	%ebp, wakeup_seq-cond_futex(%ebx)
-	movl	%eax, wakeup_seq-cond_futex+4(%ebx)
-	movl	%ebp, woken_seq-cond_futex(%ebx)
-	movl	%eax, woken_seq-cond_futex+4(%ebx)
-	addl	%ebp, %ebp
-	addl	$1, broadcast_seq-cond_futex(%ebx)
-	movl	%ebp, (%ebx)
-
-	/* Get the address of the mutex used.  */
-	movl	dep_mutex-cond_futex(%ebx), %edi
-
-	/* Unlock.  */
-	LOCK
-	subl	$1, cond_lock-cond_futex(%ebx)
-	jne	7f
-
-	/* Don't use requeue for pshared condvars.  */
-8:	cmpl	$-1, %edi
-	je	9f
-
-	/* Do not use requeue for pshared condvars.  */
-	testl	$PS_BIT, MUTEX_KIND(%edi)
-	jne	9f
-
-	/* Requeue to a non-robust PI mutex if the PI bit is set and
-	   the robust bit is not set.  */
-	movl	MUTEX_KIND(%edi), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	je	81f
-
-	/* Wake up all threads.  */
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %ecx
-#else
-	movl	%gs:PRIVATE_FUTEX, %ecx
-	orl	$FUTEX_CMP_REQUEUE, %ecx
-#endif
-	movl	$SYS_futex, %eax
-	movl	$0x7fffffff, %esi
-	movl	$1, %edx
-	/* Get the address of the futex involved.  */
-# if MUTEX_FUTEX != 0
-	addl	$MUTEX_FUTEX, %edi
-# endif
-/* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for sysenter.
-	ENTER_KERNEL  */
-	int	$0x80
-
-	/* For any kind of error, which mainly is EAGAIN, we try again
-	   with WAKE.  The general test also covers running on old
-	   kernels.  */
-	cmpl	$0xfffff001, %eax
-	jae	9f
-
-6:	xorl	%eax, %eax
-	popl	%ebp
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebp)
-	popl	%edi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%edi)
-	popl	%esi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%esi)
-	popl	%ebx
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebx)
-	ret
-
-	cfi_restore_state
-
-81:	movl	$(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx
-	movl	$SYS_futex, %eax
-	movl	$0x7fffffff, %esi
-	movl	$1, %edx
-	/* Get the address of the futex involved.  */
-# if MUTEX_FUTEX != 0
-	addl	$MUTEX_FUTEX, %edi
-# endif
-	int	$0x80
-
-	/* For any kind of error, which mainly is EAGAIN, we try again
-	with WAKE.  The general test also covers running on old
-	kernels.  */
-	cmpl	$0xfffff001, %eax
-	jb	6b
-	jmp	9f
-
-	/* Initial locking failed.  */
-1:
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-	jmp	2b
-
-	.align	16
-	/* Unlock.  */
-4:	LOCK
-	subl	$1, cond_lock-cond_futex(%ebx)
-	je	6b
-
-	/* Unlock in loop requires wakeup.  */
-5:	leal	cond_lock-cond_futex(%ebx), %eax
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	6b
-
-	/* Unlock in loop requires wakeup.  */
-7:	leal	cond_lock-cond_futex(%ebx), %eax
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	8b
-
-9:	/* The futex requeue functionality is not available.  */
-	movl	$0x7fffffff, %edx
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	jmp	6b
-	cfi_endproc
-	.size	__pthread_cond_broadcast, .-__pthread_cond_broadcast
-versioned_symbol (libpthread, __pthread_cond_broadcast, pthread_cond_broadcast,
-		  GLIBC_2_3_2)
diff --git a/sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S b/sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S
deleted file mode 100644
index 0038775d3f..0000000000
--- a/sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S
+++ /dev/null
@@ -1,216 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <kernel-features.h>
-#include <pthread-pi-defines.h>
-#include <pthread-errnos.h>
-#include <stap-probe.h>
-
-	.text
-
-	/* int pthread_cond_signal (pthread_cond_t *cond) */
-	.globl	__pthread_cond_signal
-	.type	__pthread_cond_signal, @function
-	.align	16
-__pthread_cond_signal:
-
-	cfi_startproc
-	pushl	%ebx
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebx, 0)
-	pushl	%edi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%edi, 0)
-	cfi_remember_state
-
-	movl	12(%esp), %edi
-
-	LIBC_PROBE (cond_signal, 1, %edi)
-
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%edi)
-#else
-	cmpxchgl %edx, cond_lock(%edi)
-#endif
-	jnz	1f
-
-2:	leal	cond_futex(%edi), %ebx
-	movl	total_seq+4(%edi), %eax
-	movl	total_seq(%edi), %ecx
-	cmpl	wakeup_seq+4(%edi), %eax
-#if cond_lock != 0
-	/* Must use leal to preserve the flags.  */
-	leal	cond_lock(%edi), %edi
-#endif
-	ja	3f
-	jb	4f
-	cmpl	wakeup_seq-cond_futex(%ebx), %ecx
-	jbe	4f
-
-	/* Bump the wakeup number.  */
-3:	addl	$1, wakeup_seq-cond_futex(%ebx)
-	adcl	$0, wakeup_seq-cond_futex+4(%ebx)
-	addl	$1, (%ebx)
-
-	/* Wake up one thread.  */
-	pushl	%esi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%esi, 0)
-	pushl	%ebp
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebp, 0)
-
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	sete	%cl
-	je	8f
-
-	movl	dep_mutex-cond_futex(%ebx), %edx
-	/* Requeue to a non-robust PI mutex if the PI bit is set and
-	   the robust bit is not set.  */
-	movl	MUTEX_KIND(%edx), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	je	9f
-
-8:	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE_OP, %ecx
-	movl	$SYS_futex, %eax
-	movl	$1, %edx
-	movl	$1, %esi
-	movl	$FUTEX_OP_CLEAR_WAKE_IF_GT_ONE, %ebp
-	/* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for
-	   sysenter.
-	ENTER_KERNEL  */
-	int	$0x80
-	popl	%ebp
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebp)
-	popl	%esi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%esi)
-
-	/* For any kind of error, we try again with WAKE.
-	   The general test also covers running on old kernels.  */
-	cmpl	$-4095, %eax
-	jae	7f
-
-6:	xorl	%eax, %eax
-	popl	%edi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%edi)
-	popl	%ebx
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebx)
-	ret
-
-	cfi_restore_state
-
-9:	movl	$(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx
-	movl	$SYS_futex, %eax
-	movl	$1, %edx
-	xorl	%esi, %esi
-	movl	dep_mutex-cond_futex(%ebx), %edi
-	movl	(%ebx), %ebp
-	/* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for
-	   sysenter.
-	ENTER_KERNEL  */
-	int	$0x80
-	popl	%ebp
-	popl	%esi
-
-	leal	-cond_futex(%ebx), %edi
-
-	/* For any kind of error, we try again with WAKE.
-	   The general test also covers running on old kernels.  */
-	cmpl	$-4095, %eax
-	jb	4f
-
-7:
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	orl	$FUTEX_WAKE, %ecx
-
-	movl	$SYS_futex, %eax
-	/* %edx should be 1 already from $FUTEX_WAKE_OP syscall.
-	movl	$1, %edx  */
-	ENTER_KERNEL
-
-	/* Unlock.  Note that at this point %edi always points to
-	   cond_lock.  */
-4:	LOCK
-	subl	$1, (%edi)
-	je	6b
-
-	/* Unlock in loop requires wakeup.  */
-5:	movl	%edi, %eax
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	6b
-
-	/* Initial locking failed.  */
-1:
-#if cond_lock == 0
-	movl	%edi, %edx
-#else
-	leal	cond_lock(%edi), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%edi)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-	jmp	2b
-
-	cfi_endproc
-	.size	__pthread_cond_signal, .-__pthread_cond_signal
-versioned_symbol (libpthread, __pthread_cond_signal, pthread_cond_signal,
-		  GLIBC_2_3_2)
diff --git a/sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S b/sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S
deleted file mode 100644
index 6256376923..0000000000
--- a/sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S
+++ /dev/null
@@ -1,974 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <pthread-errnos.h>
-#include <pthread-pi-defines.h>
-#include <kernel-features.h>
-#include <stap-probe.h>
-
-	.text
-
-/* int pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
-			       const struct timespec *abstime)  */
-	.globl	__pthread_cond_timedwait
-	.type	__pthread_cond_timedwait, @function
-	.align	16
-__pthread_cond_timedwait:
-.LSTARTCODE:
-	cfi_startproc
-#ifdef SHARED
-	cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect,
-			DW.ref.__gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART)
-#else
-	cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
-#endif
-
-	pushl	%ebp
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebp, 0)
-	pushl	%edi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%edi, 0)
-	pushl	%esi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%esi, 0)
-	pushl	%ebx
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebx, 0)
-
-	movl	20(%esp), %ebx
-	movl	28(%esp), %ebp
-
-	LIBC_PROBE (cond_timedwait, 3, %ebx, 24(%esp), %ebp)
-
-	cmpl	$1000000000, 4(%ebp)
-	movl	$EINVAL, %eax
-	jae	18f
-
-	/* Stack frame:
-
-	   esp + 32
-		    +--------------------------+
-	   esp + 24 | timeout value            |
-		    +--------------------------+
-	   esp + 20 | futex pointer            |
-		    +--------------------------+
-	   esp + 16 | pi-requeued flag         |
-		    +--------------------------+
-	   esp + 12 | old broadcast_seq value  |
-		    +--------------------------+
-	   esp +  4 | old wake_seq value       |
-		    +--------------------------+
-	   esp +  0 | old cancellation mode    |
-		    +--------------------------+
-	*/
-
-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
-# ifdef PIC
-	LOAD_PIC_REG (cx)
-	cmpl	$0, __have_futex_clock_realtime@GOTOFF(%ecx)
-# else
-	cmpl	$0, __have_futex_clock_realtime
-# endif
-	je	.Lreltmo
-#endif
-
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jnz	1f
-
-	/* Store the reference to the mutex.  If there is already a
-	   different value in there this is a bad user bug.  */
-2:	cmpl	$-1, dep_mutex(%ebx)
-	movl	24(%esp), %eax
-	je	17f
-	movl	%eax, dep_mutex(%ebx)
-
-	/* Unlock the mutex.  */
-17:	xorl	%edx, %edx
-	call	__pthread_mutex_unlock_usercnt
-
-	testl	%eax, %eax
-	jne	16f
-
-	addl	$1, total_seq(%ebx)
-	adcl	$0, total_seq+4(%ebx)
-	addl	$1, cond_futex(%ebx)
-	addl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
-# define FRAME_SIZE 24
-#else
-# define FRAME_SIZE 32
-#endif
-	subl	$FRAME_SIZE, %esp
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-	cfi_remember_state
-
-	/* Get and store current wakeup_seq value.  */
-	movl	wakeup_seq(%ebx), %edi
-	movl	wakeup_seq+4(%ebx), %edx
-	movl	broadcast_seq(%ebx), %eax
-	movl	%edi, 4(%esp)
-	movl	%edx, 8(%esp)
-	movl	%eax, 12(%esp)
-
-	/* Reset the pi-requeued flag.  */
-	movl	$0, 16(%esp)
-
-	cmpl	$0, (%ebp)
-	movl	$-ETIMEDOUT, %esi
-	js	6f
-
-8:	movl	cond_futex(%ebx), %edi
-	movl	%edi, 20(%esp)
-
-	/* Unlock.  */
-	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	jne	3f
-
-.LcleanupSTART:
-4:	call	__pthread_enable_asynccancel
-	movl	%eax, (%esp)
-
-	leal	(%ebp), %esi
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	sete	%cl
-	je	40f
-
-	movl	dep_mutex(%ebx), %edi
-	/* Requeue to a non-robust PI mutex if the PI bit is set and
-	   the robust bit is not set.  */
-	movl	MUTEX_KIND(%edi), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	jne	40f
-
-	movl	$(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx
-	/* The following only works like this because we only support
-	   two clocks, represented using a single bit.  */
-	testl	$1, cond_nwaiters(%ebx)
-	/* XXX Need to implement using sete instead of a jump.  */
-	jne	42f
-	orl	$FUTEX_CLOCK_REALTIME, %ecx
-
-42:	movl	20(%esp), %edx
-	addl	$cond_futex, %ebx
-.Ladd_cond_futex_pi:
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	subl	$cond_futex, %ebx
-.Lsub_cond_futex_pi:
-	movl	%eax, %esi
-	/* Set the pi-requeued flag only if the kernel has returned 0. The
-	   kernel does not hold the mutex on ETIMEDOUT or any other error.  */
-	cmpl	$0, %eax
-	sete	16(%esp)
-	je	41f
-
-	/* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
-	   successfully, it has already locked the mutex for us and the
-	   pi_flag (16(%esp)) is set to denote that fact.  However, if another
-	   thread changed the futex value before we entered the wait, the
-	   syscall may return an EAGAIN and the mutex is not locked.  We go
-	   ahead with a success anyway since later we look at the pi_flag to
-	   decide if we got the mutex or not.  The sequence numbers then make
-	   sure that only one of the threads actually wake up.  We retry using
-	   normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
-	   and PI futexes don't mix.
-
-	   Note that we don't check for EAGAIN specifically; we assume that the
-	   only other error the futex function could return is EAGAIN (barring
-	   the ETIMEOUT of course, for the timeout case in futex) since
-	   anything else would mean an error in our function.  It is too
-	   expensive to do that check for every call (which is  quite common in
-	   case of a large number of threads), so it has been skipped.  */
-	cmpl	$-ENOSYS, %eax
-	jne	41f
-	xorl	%ecx, %ecx
-
-40:	subl	$1, %ecx
-	movl	$0, 16(%esp)
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAIT_BITSET, %ecx
-	/* The following only works like this because we only support
-	   two clocks, represented using a single bit.  */
-	testl	$1, cond_nwaiters(%ebx)
-	jne	30f
-	orl	$FUTEX_CLOCK_REALTIME, %ecx
-30:
-	movl	20(%esp), %edx
-	movl	$0xffffffff, %ebp
-	addl	$cond_futex, %ebx
-.Ladd_cond_futex:
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	subl	$cond_futex, %ebx
-.Lsub_cond_futex:
-	movl	28+FRAME_SIZE(%esp), %ebp
-	movl	%eax, %esi
-
-41:	movl	(%esp), %eax
-	call	__pthread_disable_asynccancel
-.LcleanupEND:
-
-	/* Lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jnz	5f
-
-6:	movl	broadcast_seq(%ebx), %eax
-	cmpl	12(%esp), %eax
-	jne	23f
-
-	movl	woken_seq(%ebx), %eax
-	movl	woken_seq+4(%ebx), %ecx
-
-	movl	wakeup_seq(%ebx), %edi
-	movl	wakeup_seq+4(%ebx), %edx
-
-	cmpl	8(%esp), %edx
-	jne	7f
-	cmpl	4(%esp), %edi
-	je	15f
-
-7:	cmpl	%ecx, %edx
-	jne	9f
-	cmp	%eax, %edi
-	jne	9f
-
-15:	cmpl	$-ETIMEDOUT, %esi
-	je	28f
-
-	/* We need to go back to futex_wait.  If we're using requeue_pi, then
-	   release the mutex we had acquired and go back.  */
-	movl	16(%esp), %edx
-	test	%edx, %edx
-	jz	8b
-
-	/* Adjust the mutex values first and then unlock it.  The unlock
-	   should always succeed or else the kernel did not lock the mutex
-	   correctly.  */
-	movl	dep_mutex(%ebx), %eax
-	call	__pthread_mutex_cond_lock_adjust
-	movl	dep_mutex(%ebx), %eax
-	xorl	%edx, %edx
-	call	__pthread_mutex_unlock_usercnt
-	jmp	8b
-
-28:	addl	$1, wakeup_seq(%ebx)
-	adcl	$0, wakeup_seq+4(%ebx)
-	addl	$1, cond_futex(%ebx)
-	movl	$ETIMEDOUT, %esi
-	jmp	14f
-
-23:	xorl	%esi, %esi
-	jmp	24f
-
-9:	xorl	%esi, %esi
-14:	addl	$1, woken_seq(%ebx)
-	adcl	$0, woken_seq+4(%ebx)
-
-24:	subl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	movl	total_seq(%ebx), %eax
-	andl	total_seq+4(%ebx), %eax
-	cmpl	$0xffffffff, %eax
-	jne	25f
-	movl	cond_nwaiters(%ebx), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	25f
-
-	addl	$cond_nwaiters, %ebx
-	movl	$SYS_futex, %eax
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_nwaiters(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$1, %edx
-	ENTER_KERNEL
-	subl	$cond_nwaiters, %ebx
-
-25:	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	jne	10f
-
-11:	movl	24+FRAME_SIZE(%esp), %eax
-	/* With requeue_pi, the mutex lock is held in the kernel.  */
-	movl	16(%esp), %ecx
-	testl	%ecx, %ecx
-	jnz	27f
-
-	call	__pthread_mutex_cond_lock
-26:	addl	$FRAME_SIZE, %esp
-	cfi_adjust_cfa_offset(-FRAME_SIZE)
-
-	/* We return the result of the mutex_lock operation if it failed.  */
-	testl	%eax, %eax
-#ifdef HAVE_CMOV
-	cmovel	%esi, %eax
-#else
-	jne	22f
-	movl	%esi, %eax
-22:
-#endif
-
-18:	popl	%ebx
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebx)
-	popl	%esi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%esi)
-	popl	%edi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%edi)
-	popl	%ebp
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebp)
-
-	ret
-
-	cfi_restore_state
-
-27:	call	__pthread_mutex_cond_lock_adjust
-	xorl	%eax, %eax
-	jmp	26b
-
-	cfi_adjust_cfa_offset(-FRAME_SIZE);
-	/* Initial locking failed.  */
-1:
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-	jmp	2b
-
-	/* The initial unlocking of the mutex failed.  */
-16:
-	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	jne	18b
-
-	movl	%eax, %esi
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-
-	movl	%esi, %eax
-	jmp	18b
-
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-
-	/* Unlock in loop requires wakeup.  */
-3:
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	4b
-
-	/* Locking in loop failed.  */
-5:
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-	jmp	6b
-
-	/* Unlock after loop requires wakeup.  */
-10:
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	11b
-
-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
-	cfi_adjust_cfa_offset(-FRAME_SIZE)
-.Lreltmo:
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-# if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-# else
-	cmpxchgl %edx, cond_lock(%ebx)
-# endif
-	jnz	101f
-
-	/* Store the reference to the mutex.  If there is already a
-	   different value in there this is a bad user bug.  */
-102:	cmpl	$-1, dep_mutex(%ebx)
-	movl	24(%esp), %eax
-	je	117f
-	movl	%eax, dep_mutex(%ebx)
-
-	/* Unlock the mutex.  */
-117:	xorl	%edx, %edx
-	call	__pthread_mutex_unlock_usercnt
-
-	testl	%eax, %eax
-	jne	16b
-
-	addl	$1, total_seq(%ebx)
-	adcl	$0, total_seq+4(%ebx)
-	addl	$1, cond_futex(%ebx)
-	addl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-	subl	$FRAME_SIZE, %esp
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-
-	/* Get and store current wakeup_seq value.  */
-	movl	wakeup_seq(%ebx), %edi
-	movl	wakeup_seq+4(%ebx), %edx
-	movl	broadcast_seq(%ebx), %eax
-	movl	%edi, 4(%esp)
-	movl	%edx, 8(%esp)
-	movl	%eax, 12(%esp)
-
-	/* Reset the pi-requeued flag.  */
-	movl	$0, 16(%esp)
-
-	/* Get the current time.  */
-108:	movl	%ebx, %edx
-# ifdef __NR_clock_gettime
-	/* Get the clock number.  */
-	movl	cond_nwaiters(%ebx), %ebx
-	andl	$((1 << nwaiters_shift) - 1), %ebx
-	/* Only clocks 0 and 1 are allowed so far.  Both are handled in the
-	   kernel.  */
-	leal	24(%esp), %ecx
-	movl	$__NR_clock_gettime, %eax
-	ENTER_KERNEL
-	movl	%edx, %ebx
-
-	/* Compute relative timeout.  */
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %edx
-	subl	24(%esp), %ecx
-	subl	28(%esp), %edx
-# else
-	/* Get the current time.  */
-	leal	24(%esp), %ebx
-	xorl	%ecx, %ecx
-	movl	$__NR_gettimeofday, %eax
-	ENTER_KERNEL
-	movl	%edx, %ebx
-
-	/* Compute relative timeout.  */
-	movl	28(%esp), %eax
-	movl	$1000, %edx
-	mul	%edx		/* Milli seconds to nano seconds.  */
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %edx
-	subl	24(%esp), %ecx
-	subl	%eax, %edx
-# endif
-	jns	112f
-	addl	$1000000000, %edx
-	subl	$1, %ecx
-112:	testl	%ecx, %ecx
-	movl	$-ETIMEDOUT, %esi
-	js	106f
-
-	/* Store relative timeout.  */
-121:	movl	%ecx, 24(%esp)
-	movl	%edx, 28(%esp)
-
-	movl	cond_futex(%ebx), %edi
-	movl	%edi, 20(%esp)
-
-	/* Unlock.  */
-	LOCK
-# if cond_lock == 0
-	subl	$1, (%ebx)
-# else
-	subl	$1, cond_lock(%ebx)
-# endif
-	jne	103f
-
-.LcleanupSTART2:
-104:	call	__pthread_enable_asynccancel
-	movl	%eax, (%esp)
-
-	leal	24(%esp), %esi
-# if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-# endif
-	cmpl	$-1, dep_mutex(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-# ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-# else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-# endif
-# if FUTEX_WAIT != 0
-	addl	$FUTEX_WAIT, %ecx
-# endif
-	movl	20(%esp), %edx
-	addl	$cond_futex, %ebx
-.Ladd_cond_futex2:
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	subl	$cond_futex, %ebx
-.Lsub_cond_futex2:
-	movl	%eax, %esi
-
-141:	movl	(%esp), %eax
-	call	__pthread_disable_asynccancel
-.LcleanupEND2:
-
-
-	/* Lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-# if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-# else
-	cmpxchgl %edx, cond_lock(%ebx)
-# endif
-	jnz	105f
-
-106:	movl	broadcast_seq(%ebx), %eax
-	cmpl	12(%esp), %eax
-	jne	23b
-
-	movl	woken_seq(%ebx), %eax
-	movl	woken_seq+4(%ebx), %ecx
-
-	movl	wakeup_seq(%ebx), %edi
-	movl	wakeup_seq+4(%ebx), %edx
-
-	cmpl	8(%esp), %edx
-	jne	107f
-	cmpl	4(%esp), %edi
-	je	115f
-
-107:	cmpl	%ecx, %edx
-	jne	9b
-	cmp	%eax, %edi
-	jne	9b
-
-115:	cmpl	$-ETIMEDOUT, %esi
-	je	28b
-
-	jmp	8b
-
-	cfi_adjust_cfa_offset(-FRAME_SIZE)
-	/* Initial locking failed.  */
-101:
-# if cond_lock == 0
-	movl	%ebx, %edx
-# else
-	leal	cond_lock(%ebx), %edx
-# endif
-# if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-# endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-# if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-# endif
-	call	__lll_lock_wait
-	jmp	102b
-
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-
-	/* Unlock in loop requires wakeup.  */
-103:
-# if cond_lock == 0
-	movl	%ebx, %eax
-# else
-	leal	cond_lock(%ebx), %eax
-# endif
-# if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-# endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-# if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-# endif
-	call	__lll_unlock_wake
-	jmp	104b
-
-	/* Locking in loop failed.  */
-105:
-# if cond_lock == 0
-	movl	%ebx, %edx
-# else
-	leal	cond_lock(%ebx), %edx
-# endif
-# if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-# endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-# if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-# endif
-	call	__lll_lock_wait
-	jmp	106b
-#endif
-
-	.size	__pthread_cond_timedwait, .-__pthread_cond_timedwait
-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
-		  GLIBC_2_3_2)
-
-
-	.type	__condvar_tw_cleanup2, @function
-__condvar_tw_cleanup2:
-	subl	$cond_futex, %ebx
-	.size	__condvar_tw_cleanup2, .-__condvar_tw_cleanup2
-	.type	__condvar_tw_cleanup, @function
-__condvar_tw_cleanup:
-	movl	%eax, %esi
-
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jz	1f
-
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-
-1:	movl	broadcast_seq(%ebx), %eax
-	cmpl	12(%esp), %eax
-	jne	3f
-
-	/* We increment the wakeup_seq counter only if it is lower than
-	   total_seq.  If this is not the case the thread was woken and
-	   then canceled.  In this case we ignore the signal.  */
-	movl	total_seq(%ebx), %eax
-	movl	total_seq+4(%ebx), %edi
-	cmpl	wakeup_seq+4(%ebx), %edi
-	jb	6f
-	ja	7f
-	cmpl	wakeup_seq(%ebx), %eax
-	jbe	7f
-
-6:	addl	$1, wakeup_seq(%ebx)
-	adcl	$0, wakeup_seq+4(%ebx)
-	addl	$1, cond_futex(%ebx)
-
-7:	addl	$1, woken_seq(%ebx)
-	adcl	$0, woken_seq+4(%ebx)
-
-3:	subl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	xorl	%edi, %edi
-	movl	total_seq(%ebx), %eax
-	andl	total_seq+4(%ebx), %eax
-	cmpl	$0xffffffff, %eax
-	jne	4f
-	movl	cond_nwaiters(%ebx), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	4f
-
-	addl	$cond_nwaiters, %ebx
-	movl	$SYS_futex, %eax
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_nwaiters(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$1, %edx
-	ENTER_KERNEL
-	subl	$cond_nwaiters, %ebx
-	movl	$1, %edi
-
-4:	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	je	2f
-
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-
-	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	testl	%edi, %edi
-	jnz	5f
-	addl	$cond_futex, %ebx
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$SYS_futex, %eax
-	movl	$0x7fffffff, %edx
-	ENTER_KERNEL
-
-	/* Lock the mutex only if we don't own it already.  This only happens
-	   in case of PI mutexes, if we got cancelled after a successful
-	   return of the futex syscall and before disabling async
-	   cancellation.  */
-5:	movl	24+FRAME_SIZE(%esp), %eax
-	movl	MUTEX_KIND(%eax), %ebx
-	andl	$(ROBUST_BIT|PI_BIT), %ebx
-	cmpl	$PI_BIT, %ebx
-	jne	8f
-
-	movl	(%eax), %ebx
-	andl	$TID_MASK, %ebx
-	cmpl	%ebx, %gs:TID
-	jne	8f
-	/* We managed to get the lock.  Fix it up before returning.  */
-	call	__pthread_mutex_cond_lock_adjust
-	jmp	9f
-
-8:	call	__pthread_mutex_cond_lock
-
-9:	movl	%esi, (%esp)
-.LcallUR:
-	call	_Unwind_Resume
-	hlt
-.LENDCODE:
-	cfi_endproc
-	.size	__condvar_tw_cleanup, .-__condvar_tw_cleanup
-
-
-	.section .gcc_except_table,"a",@progbits
-.LexceptSTART:
-	.byte	DW_EH_PE_omit			# @LPStart format (omit)
-	.byte	DW_EH_PE_omit			# @TType format (omit)
-	.byte	DW_EH_PE_sdata4			# call-site format
-						# DW_EH_PE_sdata4
-	.uleb128 .Lcstend-.Lcstbegin
-.Lcstbegin:
-	.long	.LcleanupSTART-.LSTARTCODE
-	.long	.Ladd_cond_futex_pi-.LcleanupSTART
-	.long	__condvar_tw_cleanup-.LSTARTCODE
-	.uleb128  0
-	.long	.Ladd_cond_futex_pi-.LSTARTCODE
-	.long	.Lsub_cond_futex_pi-.Ladd_cond_futex_pi
-	.long	__condvar_tw_cleanup2-.LSTARTCODE
-	.uleb128  0
-	.long	.Lsub_cond_futex_pi-.LSTARTCODE
-	.long	.Ladd_cond_futex-.Lsub_cond_futex_pi
-	.long	__condvar_tw_cleanup-.LSTARTCODE
-	.uleb128  0
-	.long	.Ladd_cond_futex-.LSTARTCODE
-	.long	.Lsub_cond_futex-.Ladd_cond_futex
-	.long	__condvar_tw_cleanup2-.LSTARTCODE
-	.uleb128  0
-	.long	.Lsub_cond_futex-.LSTARTCODE
-	.long	.LcleanupEND-.Lsub_cond_futex
-	.long	__condvar_tw_cleanup-.LSTARTCODE
-	.uleb128  0
-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
-	.long	.LcleanupSTART2-.LSTARTCODE
-	.long	.Ladd_cond_futex2-.LcleanupSTART2
-	.long	__condvar_tw_cleanup-.LSTARTCODE
-	.uleb128  0
-	.long	.Ladd_cond_futex2-.LSTARTCODE
-	.long	.Lsub_cond_futex2-.Ladd_cond_futex2
-	.long	__condvar_tw_cleanup2-.LSTARTCODE
-	.uleb128  0
-	.long	.Lsub_cond_futex2-.LSTARTCODE
-	.long	.LcleanupEND2-.Lsub_cond_futex2
-	.long	__condvar_tw_cleanup-.LSTARTCODE
-	.uleb128  0
-#endif
-	.long	.LcallUR-.LSTARTCODE
-	.long	.LENDCODE-.LcallUR
-	.long	0
-	.uleb128  0
-.Lcstend:
-
-
-#ifdef SHARED
-	.hidden DW.ref.__gcc_personality_v0
-	.weak	DW.ref.__gcc_personality_v0
-	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
-	.align	4
-	.type	DW.ref.__gcc_personality_v0, @object
-	.size	DW.ref.__gcc_personality_v0, 4
-DW.ref.__gcc_personality_v0:
-	.long   __gcc_personality_v0
-#endif
diff --git a/sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S b/sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S
deleted file mode 100644
index 5016718801..0000000000
--- a/sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S
+++ /dev/null
@@ -1,642 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <tcb-offsets.h>
-#include <pthread-errnos.h>
-#include <pthread-pi-defines.h>
-#include <kernel-features.h>
-#include <stap-probe.h>
-
-
-	.text
-
-/* int pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)  */
-	.globl	__pthread_cond_wait
-	.type	__pthread_cond_wait, @function
-	.align	16
-__pthread_cond_wait:
-.LSTARTCODE:
-	cfi_startproc
-#ifdef SHARED
-	cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect,
-			DW.ref.__gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART)
-#else
-	cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
-#endif
-
-	pushl	%ebp
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebp, 0)
-	pushl	%edi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%edi, 0)
-	pushl	%esi
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%esi, 0)
-	pushl	%ebx
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset(%ebx, 0)
-
-	xorl	%esi, %esi
-	movl	20(%esp), %ebx
-
-	LIBC_PROBE (cond_wait, 2, 24(%esp), %ebx)
-
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jnz	1f
-
-	/* Store the reference to the mutex.  If there is already a
-	   different value in there this is a bad user bug.  */
-2:	cmpl	$-1, dep_mutex(%ebx)
-	movl	24(%esp), %eax
-	je	15f
-	movl	%eax, dep_mutex(%ebx)
-
-	/* Unlock the mutex.  */
-15:	xorl	%edx, %edx
-	call	__pthread_mutex_unlock_usercnt
-
-	testl	%eax, %eax
-	jne	12f
-
-	addl	$1, total_seq(%ebx)
-	adcl	$0, total_seq+4(%ebx)
-	addl	$1, cond_futex(%ebx)
-	addl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-#define FRAME_SIZE 20
-	subl	$FRAME_SIZE, %esp
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-	cfi_remember_state
-
-	/* Get and store current wakeup_seq value.  */
-	movl	wakeup_seq(%ebx), %edi
-	movl	wakeup_seq+4(%ebx), %edx
-	movl	broadcast_seq(%ebx), %eax
-	movl	%edi, 4(%esp)
-	movl	%edx, 8(%esp)
-	movl	%eax, 12(%esp)
-
-	/* Reset the pi-requeued flag.  */
-8:	movl	$0, 16(%esp)
-	movl	cond_futex(%ebx), %ebp
-
-	/* Unlock.  */
-	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	jne	3f
-
-.LcleanupSTART:
-4:	call	__pthread_enable_asynccancel
-	movl	%eax, (%esp)
-
-	xorl	%ecx, %ecx
-	cmpl	$-1, dep_mutex(%ebx)
-	sete	%cl
-	je	18f
-
-	movl	dep_mutex(%ebx), %edi
-	/* Requeue to a non-robust PI mutex if the PI bit is set and
-	   the robust bit is not set.  */
-	movl	MUTEX_KIND(%edi), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	jne	18f
-
-	movl	$(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx
-	movl	%ebp, %edx
-	xorl	%esi, %esi
-	addl	$cond_futex, %ebx
-.Ladd_cond_futex_pi:
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	subl	$cond_futex, %ebx
-.Lsub_cond_futex_pi:
-	/* Set the pi-requeued flag only if the kernel has returned 0. The
-	   kernel does not hold the mutex on error.  */
-	cmpl	$0, %eax
-	sete	16(%esp)
-	je	19f
-
-	/* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
-	   successfully, it has already locked the mutex for us and the
-	   pi_flag (16(%esp)) is set to denote that fact.  However, if another
-	   thread changed the futex value before we entered the wait, the
-	   syscall may return an EAGAIN and the mutex is not locked.  We go
-	   ahead with a success anyway since later we look at the pi_flag to
-	   decide if we got the mutex or not.  The sequence numbers then make
-	   sure that only one of the threads actually wake up.  We retry using
-	   normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
-	   and PI futexes don't mix.
-
-	   Note that we don't check for EAGAIN specifically; we assume that the
-	   only other error the futex function could return is EAGAIN since
-	   anything else would mean an error in our function.  It is too
-	   expensive to do that check for every call (which is 	quite common in
-	   case of a large number of threads), so it has been skipped.  */
-	cmpl	$-ENOSYS, %eax
-	jne	19f
-	xorl	%ecx, %ecx
-
-18:	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-#if FUTEX_WAIT != 0
-	addl	$FUTEX_WAIT, %ecx
-#endif
-	movl	%ebp, %edx
-	addl	$cond_futex, %ebx
-.Ladd_cond_futex:
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	subl	$cond_futex, %ebx
-.Lsub_cond_futex:
-
-19:	movl	(%esp), %eax
-	call	__pthread_disable_asynccancel
-.LcleanupEND:
-
-	/* Lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jnz	5f
-
-6:	movl	broadcast_seq(%ebx), %eax
-	cmpl	12(%esp), %eax
-	jne	16f
-
-	movl	woken_seq(%ebx), %eax
-	movl	woken_seq+4(%ebx), %ecx
-
-	movl	wakeup_seq(%ebx), %edi
-	movl	wakeup_seq+4(%ebx), %edx
-
-	cmpl	8(%esp), %edx
-	jne	7f
-	cmpl	4(%esp), %edi
-	je	22f
-
-7:	cmpl	%ecx, %edx
-	jne	9f
-	cmp	%eax, %edi
-	je	22f
-
-9:	addl	$1, woken_seq(%ebx)
-	adcl	$0, woken_seq+4(%ebx)
-
-	/* Unlock */
-16:	subl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	movl	total_seq(%ebx), %eax
-	andl	total_seq+4(%ebx), %eax
-	cmpl	$0xffffffff, %eax
-	jne	17f
-	movl	cond_nwaiters(%ebx), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	17f
-
-	addl	$cond_nwaiters, %ebx
-	movl	$SYS_futex, %eax
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_nwaiters(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$1, %edx
-	ENTER_KERNEL
-	subl	$cond_nwaiters, %ebx
-
-17:	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	jne	10f
-
-	/* With requeue_pi, the mutex lock is held in the kernel.  */
-11:	movl	24+FRAME_SIZE(%esp), %eax
-	movl	16(%esp), %ecx
-	testl	%ecx, %ecx
-	jnz	21f
-
-	call	__pthread_mutex_cond_lock
-20:	addl	$FRAME_SIZE, %esp
-	cfi_adjust_cfa_offset(-FRAME_SIZE);
-
-14:	popl	%ebx
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebx)
-	popl	%esi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%esi)
-	popl	%edi
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%edi)
-	popl	%ebp
-	cfi_adjust_cfa_offset(-4)
-	cfi_restore(%ebp)
-
-	/* We return the result of the mutex_lock operation.  */
-	ret
-
-	cfi_restore_state
-
-21:	call	__pthread_mutex_cond_lock_adjust
-	xorl	%eax, %eax
-	jmp	20b
-
-	cfi_adjust_cfa_offset(-FRAME_SIZE);
-
-	/* We need to go back to futex_wait.  If we're using requeue_pi, then
-	   release the mutex we had acquired and go back.  */
-22:	movl	16(%esp), %edx
-	test	%edx, %edx
-	jz	8b
-
-	/* Adjust the mutex values first and then unlock it.  The unlock
-	   should always succeed or else the kernel did not lock the mutex
-	   correctly.  */
-	movl	dep_mutex(%ebx), %eax
-	call    __pthread_mutex_cond_lock_adjust
-	movl	dep_mutex(%ebx), %eax
-	xorl	%edx, %edx
-	call	__pthread_mutex_unlock_usercnt
-	jmp	8b
-
-	/* Initial locking failed.  */
-1:
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-	jmp	2b
-
-	/* The initial unlocking of the mutex failed.  */
-12:
-	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	jne	14b
-
-	movl	%eax, %esi
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-
-	movl	%esi, %eax
-	jmp	14b
-
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-
-	/* Unlock in loop requires wakeup.  */
-3:
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	4b
-
-	/* Locking in loop failed.  */
-5:
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-	jmp	6b
-
-	/* Unlock after loop requires wakeup.  */
-10:
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-	jmp	11b
-
-	.size	__pthread_cond_wait, .-__pthread_cond_wait
-versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
-		  GLIBC_2_3_2)
-
-
-	.type	__condvar_w_cleanup2, @function
-__condvar_w_cleanup2:
-	subl	$cond_futex, %ebx
-	.size	__condvar_w_cleanup2, .-__condvar_w_cleanup2
-.LSbl4:
-	.type	__condvar_w_cleanup, @function
-__condvar_w_cleanup:
-	movl	%eax, %esi
-
-	/* Get internal lock.  */
-	movl	$1, %edx
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %edx, (%ebx)
-#else
-	cmpxchgl %edx, cond_lock(%ebx)
-#endif
-	jz	1f
-
-#if cond_lock == 0
-	movl	%ebx, %edx
-#else
-	leal	cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_lock_wait
-
-1:	movl	broadcast_seq(%ebx), %eax
-	cmpl	12(%esp), %eax
-	jne	3f
-
-	/* We increment the wakeup_seq counter only if it is lower than
-	   total_seq.  If this is not the case the thread was woken and
-	   then canceled.  In this case we ignore the signal.  */
-	movl	total_seq(%ebx), %eax
-	movl	total_seq+4(%ebx), %edi
-	cmpl	wakeup_seq+4(%ebx), %edi
-	jb	6f
-	ja	7f
-	cmpl	wakeup_seq(%ebx), %eax
-	jbe	7f
-
-6:	addl	$1, wakeup_seq(%ebx)
-	adcl	$0, wakeup_seq+4(%ebx)
-	addl	$1, cond_futex(%ebx)
-
-7:	addl	$1, woken_seq(%ebx)
-	adcl	$0, woken_seq+4(%ebx)
-
-3:	subl	$(1 << nwaiters_shift), cond_nwaiters(%ebx)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	xorl	%edi, %edi
-	movl	total_seq(%ebx), %eax
-	andl	total_seq+4(%ebx), %eax
-	cmpl	$0xffffffff, %eax
-	jne	4f
-	movl	cond_nwaiters(%ebx), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	4f
-
-	addl	$cond_nwaiters, %ebx
-	movl	$SYS_futex, %eax
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_nwaiters(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$1, %edx
-	ENTER_KERNEL
-	subl	$cond_nwaiters, %ebx
-	movl	$1, %edi
-
-4:	LOCK
-#if cond_lock == 0
-	subl	$1, (%ebx)
-#else
-	subl	$1, cond_lock(%ebx)
-#endif
-	je	2f
-
-#if cond_lock == 0
-	movl	%ebx, %eax
-#else
-	leal	cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex(%ebx)
-	setne	%cl
-	subl	$1, %ecx
-	andl	$(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
-	addl	$LLL_PRIVATE, %ecx
-#endif
-	call	__lll_unlock_wake
-
-	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	testl	%edi, %edi
-	jnz	5f
-	addl	$cond_futex, %ebx
-#if FUTEX_PRIVATE_FLAG > 255
-	xorl	%ecx, %ecx
-#endif
-	cmpl	$-1, dep_mutex-cond_futex(%ebx)
-	sete	%cl
-	subl	$1, %ecx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %ecx
-#else
-	andl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	addl	$FUTEX_WAKE, %ecx
-	movl	$SYS_futex, %eax
-	movl	$0x7fffffff, %edx
-	ENTER_KERNEL
-
-	/* Lock the mutex only if we don't own it already.  This only happens
-	   in case of PI mutexes, if we got cancelled after a successful
-	   return of the futex syscall and before disabling async
-	   cancellation.  */
-5:	movl	24+FRAME_SIZE(%esp), %eax
-	movl	MUTEX_KIND(%eax), %ebx
-	andl	$(ROBUST_BIT|PI_BIT), %ebx
-	cmpl	$PI_BIT, %ebx
-	jne	8f
-
-	movl	(%eax), %ebx
-	andl	$TID_MASK, %ebx
-	cmpl	%ebx, %gs:TID
-	jne	8f
-	/* We managed to get the lock.  Fix it up before returning.  */
-	call	__pthread_mutex_cond_lock_adjust
-	jmp	9f
-
-8:	call	__pthread_mutex_cond_lock
-
-9:	movl	%esi, (%esp)
-.LcallUR:
-	call	_Unwind_Resume
-	hlt
-.LENDCODE:
-	cfi_endproc
-	.size	__condvar_w_cleanup, .-__condvar_w_cleanup
-
-
-	.section .gcc_except_table,"a",@progbits
-.LexceptSTART:
-	.byte	DW_EH_PE_omit			# @LPStart format (omit)
-	.byte	DW_EH_PE_omit			# @TType format (omit)
-	.byte	DW_EH_PE_sdata4			# call-site format
-						# DW_EH_PE_sdata4
-	.uleb128 .Lcstend-.Lcstbegin
-.Lcstbegin:
-	.long	.LcleanupSTART-.LSTARTCODE
-	.long	.Ladd_cond_futex_pi-.LcleanupSTART
-	.long	__condvar_w_cleanup-.LSTARTCODE
-	.uleb128  0
-	.long	.Ladd_cond_futex_pi-.LSTARTCODE
-	.long	.Lsub_cond_futex_pi-.Ladd_cond_futex_pi
-	.long	__condvar_w_cleanup2-.LSTARTCODE
-	.uleb128  0
-	.long	.Lsub_cond_futex_pi-.LSTARTCODE
-	.long	.Ladd_cond_futex-.Lsub_cond_futex_pi
-	.long	__condvar_w_cleanup-.LSTARTCODE
-	.uleb128  0
-	.long	.Ladd_cond_futex-.LSTARTCODE
-	.long	.Lsub_cond_futex-.Ladd_cond_futex
-	.long	__condvar_w_cleanup2-.LSTARTCODE
-	.uleb128  0
-	.long	.Lsub_cond_futex-.LSTARTCODE
-	.long	.LcleanupEND-.Lsub_cond_futex
-	.long	__condvar_w_cleanup-.LSTARTCODE
-	.uleb128  0
-	.long	.LcallUR-.LSTARTCODE
-	.long	.LENDCODE-.LcallUR
-	.long	0
-	.uleb128  0
-.Lcstend:
-
-#ifdef SHARED
-	.hidden DW.ref.__gcc_personality_v0
-	.weak   DW.ref.__gcc_personality_v0
-	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
-	.align 4
-	.type   DW.ref.__gcc_personality_v0, @object
-	.size   DW.ref.__gcc_personality_v0, 4
-DW.ref.__gcc_personality_v0:
-	.long   __gcc_personality_v0
-#endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h b/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
index d230ac9678..387bbca78a 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
@@ -123,19 +123,32 @@ typedef union
 
 
 /* Data structure for conditional variable handling.  The structure of
-   the attribute type is deliberately not exposed.  */
+   the attribute type is not exposed on purpose.  */
 typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;
diff --git a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S b/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
deleted file mode 100644
index de455dda4f..0000000000
--- a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+++ /dev/null
@@ -1,177 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <kernel-features.h>
-#include <pthread-pi-defines.h>
-#include <pthread-errnos.h>
-#include <stap-probe.h>
-
-	.text
-
-	/* int pthread_cond_broadcast (pthread_cond_t *cond) */
-ENTRY(__pthread_cond_broadcast)
-
-	LIBC_PROBE (cond_broadcast, 1, %rdi)
-
-	/* Get internal lock.  */
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jnz	1f
-
-2:	addq	$cond_futex, %rdi
-	movq	total_seq-cond_futex(%rdi), %r9
-	cmpq	wakeup_seq-cond_futex(%rdi), %r9
-	jna	4f
-
-	/* Cause all currently waiting threads to recognize they are
-	   woken up.  */
-	movq	%r9, wakeup_seq-cond_futex(%rdi)
-	movq	%r9, woken_seq-cond_futex(%rdi)
-	addq	%r9, %r9
-	movl	%r9d, (%rdi)
-	incl	broadcast_seq-cond_futex(%rdi)
-
-	/* Get the address of the mutex used.  */
-	mov	dep_mutex-cond_futex(%rdi), %R8_LP
-
-	/* Unlock.  */
-	LOCK
-	decl	cond_lock-cond_futex(%rdi)
-	jne	7f
-
-8:	cmp	$-1, %R8_LP
-	je	9f
-
-	/* Do not use requeue for pshared condvars.  */
-	testl	$PS_BIT, MUTEX_KIND(%r8)
-	jne	9f
-
-	/* Requeue to a PI mutex if the PI bit is set.  */
-	movl	MUTEX_KIND(%r8), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	je	81f
-
-	/* Wake up all threads.  */
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %esi
-#else
-	movl	%fs:PRIVATE_FUTEX, %esi
-	orl	$FUTEX_CMP_REQUEUE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	movl	$1, %edx
-	movl	$0x7fffffff, %r10d
-	syscall
-
-	/* For any kind of error, which mainly is EAGAIN, we try again
-	   with WAKE.  The general test also covers running on old
-	   kernels.  */
-	cmpq	$-4095, %rax
-	jae	9f
-
-10:	xorl	%eax, %eax
-	retq
-
-	/* Wake up all threads.  */
-81:	movl	$(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
-	movl	$SYS_futex, %eax
-	movl	$1, %edx
-	movl	$0x7fffffff, %r10d
-	syscall
-
-	/* For any kind of error, which mainly is EAGAIN, we try again
-	   with WAKE.  The general test also covers running on old
-	   kernels.  */
-	cmpq	$-4095, %rax
-	jb	10b
-	jmp	9f
-
-	.align	16
-	/* Unlock.  */
-4:	LOCK
-	decl	cond_lock-cond_futex(%rdi)
-	jne	5f
-
-6:	xorl	%eax, %eax
-	retq
-
-	/* Initial locking failed.  */
-1:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-	jmp	2b
-
-	/* Unlock in loop requires wakeup.  */
-5:	addq	$cond_lock-cond_futex, %rdi
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	jmp	6b
-
-	/* Unlock in loop requires wakeup.  */
-7:	addq	$cond_lock-cond_futex, %rdi
-	cmp	$-1, %R8_LP
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	subq	$cond_lock-cond_futex, %rdi
-	jmp	8b
-
-9:	/* The futex requeue functionality is not available.  */
-	cmp	$-1, %R8_LP
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-	jmp	10b
-END(__pthread_cond_broadcast)
-
-versioned_symbol (libpthread, __pthread_cond_broadcast, pthread_cond_broadcast,
-		  GLIBC_2_3_2)
diff --git a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S b/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
deleted file mode 100644
index da14bc30bc..0000000000
--- a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+++ /dev/null
@@ -1,161 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <pthread-pi-defines.h>
-#include <kernel-features.h>
-#include <pthread-errnos.h>
-#include <stap-probe.h>
-
-
-	.text
-
-ENTRY(__pthread_cond_signal)
-
-	LIBC_PROBE (cond_signal, 1, %rdi)
-
-	/* Get internal lock.  */
-	movq	%rdi, %r8
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jnz	1f
-
-2:	addq	$cond_futex, %rdi
-	movq	total_seq(%r8), %rcx
-	cmpq	wakeup_seq(%r8), %rcx
-	jbe	4f
-
-	/* Bump the wakeup number.  */
-	addq	$1, wakeup_seq(%r8)
-	addl	$1, (%rdi)
-
-	/* Wake up one thread.  */
-	LP_OP(cmp) $-1, dep_mutex(%r8)
-	movl	$FUTEX_WAKE_OP, %esi
-	movl	$1, %edx
-	movl	$SYS_futex, %eax
-	je	8f
-
-	/* Get the address of the mutex used.  */
-	mov     dep_mutex(%r8), %RCX_LP
-	movl	MUTEX_KIND(%rcx), %r11d
-	andl	$(ROBUST_BIT|PI_BIT), %r11d
-	cmpl	$PI_BIT, %r11d
-	je	9f
-
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi
-#else
-	orl	%fs:PRIVATE_FUTEX, %esi
-#endif
-
-8:	movl	$1, %r10d
-#if cond_lock != 0
-	addq	$cond_lock, %r8
-#endif
-	movl	$FUTEX_OP_CLEAR_WAKE_IF_GT_ONE, %r9d
-	syscall
-#if cond_lock != 0
-	subq	$cond_lock, %r8
-#endif
-	/* For any kind of error, we try again with WAKE.
-	   The general test also covers running on old kernels.  */
-	cmpq	$-4095, %rax
-	jae	7f
-
-	xorl	%eax, %eax
-	retq
-
-	/* Wake up one thread and requeue none in the PI Mutex case.  */
-9:	movl	$(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
-	movq	%rcx, %r8
-	xorq	%r10, %r10
-	movl	(%rdi), %r9d	// XXX Can this be right?
-	syscall
-
-	leaq	-cond_futex(%rdi), %r8
-
-	/* For any kind of error, we try again with WAKE.
-	   The general test also covers running on old kernels.  */
-	cmpq	$-4095, %rax
-	jb	4f
-
-7:
-#ifdef __ASSUME_PRIVATE_FUTEX
-	andl	$FUTEX_PRIVATE_FLAG, %esi
-#else
-	andl	%fs:PRIVATE_FUTEX, %esi
-#endif
-	orl	$FUTEX_WAKE, %esi
-	movl	$SYS_futex, %eax
-	/* %rdx should be 1 already from $FUTEX_WAKE_OP syscall.
-	movl	$1, %edx  */
-	syscall
-
-	/* Unlock.  */
-4:	LOCK
-#if cond_lock == 0
-	decl	(%r8)
-#else
-	decl	cond_lock(%r8)
-#endif
-	jne	5f
-
-6:	xorl	%eax, %eax
-	retq
-
-	/* Initial locking failed.  */
-1:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-	jmp	2b
-
-	/* Unlock in loop requires wakeup.  */
-5:
-	movq	%r8, %rdi
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	jmp	6b
-END(__pthread_cond_signal)
-
-versioned_symbol (libpthread, __pthread_cond_signal, pthread_cond_signal,
-		  GLIBC_2_3_2)
diff --git a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
deleted file mode 100644
index 82ffa1a37e..0000000000
--- a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ /dev/null
@@ -1,623 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <pthread-pi-defines.h>
-#include <pthread-errnos.h>
-#include <stap-probe.h>
-
-#include <kernel-features.h>
-
-
-	.text
-
-
-/* int pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
-			       const struct timespec *abstime)  */
-	.globl	__pthread_cond_timedwait
-	.type	__pthread_cond_timedwait, @function
-	.align	16
-__pthread_cond_timedwait:
-.LSTARTCODE:
-	cfi_startproc
-#ifdef SHARED
-	cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect,
-			DW.ref.__gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART)
-#else
-	cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
-#endif
-
-	pushq	%r12
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r12, 0)
-	pushq	%r13
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r13, 0)
-	pushq	%r14
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r14, 0)
-	pushq	%r15
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r15, 0)
-#define FRAME_SIZE (32+8)
-	subq	$FRAME_SIZE, %rsp
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-	cfi_remember_state
-
-	LIBC_PROBE (cond_timedwait, 3, %rdi, %rsi, %rdx)
-
-	cmpq	$1000000000, 8(%rdx)
-	movl	$EINVAL, %eax
-	jae	48f
-
-	/* Stack frame:
-
-	   rsp + 48
-		    +--------------------------+
-	   rsp + 32 | timeout value            |
-		    +--------------------------+
-	   rsp + 24 | old wake_seq value       |
-		    +--------------------------+
-	   rsp + 16 | mutex pointer            |
-		    +--------------------------+
-	   rsp +  8 | condvar pointer          |
-		    +--------------------------+
-	   rsp +  4 | old broadcast_seq value  |
-		    +--------------------------+
-	   rsp +  0 | old cancellation mode    |
-		    +--------------------------+
-	*/
-
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-
-	/* Prepare structure passed to cancellation handler.  */
-	movq	%rdi, 8(%rsp)
-	movq	%rsi, 16(%rsp)
-	movq	%rdx, %r13
-
-	je	22f
-	mov	%RSI_LP, dep_mutex(%rdi)
-
-22:
-	xorb	%r15b, %r15b
-
-	/* Get internal lock.  */
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jnz	31f
-
-	/* Unlock the mutex.  */
-32:	movq	16(%rsp), %rdi
-	xorl	%esi, %esi
-	callq	__pthread_mutex_unlock_usercnt
-
-	testl	%eax, %eax
-	jne	46f
-
-	movq	8(%rsp), %rdi
-	incq	total_seq(%rdi)
-	incl	cond_futex(%rdi)
-	addl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Get and store current wakeup_seq value.  */
-	movq	8(%rsp), %rdi
-	movq	wakeup_seq(%rdi), %r9
-	movl	broadcast_seq(%rdi), %edx
-	movq	%r9, 24(%rsp)
-	movl	%edx, 4(%rsp)
-
-	cmpq	$0, (%r13)
-	movq	$-ETIMEDOUT, %r14
-	js	36f
-
-38:	movl	cond_futex(%rdi), %r12d
-
-	/* Unlock.  */
-	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	jne	33f
-
-.LcleanupSTART1:
-34:	callq	__pthread_enable_asynccancel
-	movl	%eax, (%rsp)
-
-	movq	%r13, %r10
-	movl	$FUTEX_WAIT_BITSET, %esi
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-	je	60f
-
-	mov	dep_mutex(%rdi), %R8_LP
-	/* Requeue to a non-robust PI mutex if the PI bit is set and
-	the robust bit is not set.  */
-	movl	MUTEX_KIND(%r8), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	jne	61f
-
-	movl	$(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
-	xorl	%eax, %eax
-	/* The following only works like this because we only support
-	   two clocks, represented using a single bit.  */
-	testl	$1, cond_nwaiters(%rdi)
-	movl	$FUTEX_CLOCK_REALTIME, %edx
-	cmove	%edx, %eax
-	orl	%eax, %esi
-	movq	%r12, %rdx
-	addq	$cond_futex, %rdi
-	movl	$SYS_futex, %eax
-	syscall
-
-	cmpl	$0, %eax
-	sete	%r15b
-
-#ifdef __ASSUME_REQUEUE_PI
-	jmp	62f
-#else
-	je	62f
-
-	/* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
-	   successfully, it has already locked the mutex for us and the
-	   pi_flag (%r15b) is set to denote that fact.  However, if another
-	   thread changed the futex value before we entered the wait, the
-	   syscall may return an EAGAIN and the mutex is not locked.  We go
-	   ahead with a success anyway since later we look at the pi_flag to
-	   decide if we got the mutex or not.  The sequence numbers then make
-	   sure that only one of the threads actually wake up.  We retry using
-	   normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
-	   and PI futexes don't mix.
-
-	   Note that we don't check for EAGAIN specifically; we assume that the
-	   only other error the futex function could return is EAGAIN (barring
-	   the ETIMEOUT of course, for the timeout case in futex) since
-	   anything else would mean an error in our function.  It is too
-	   expensive to do that check for every call (which is  quite common in
-	   case of a large number of threads), so it has been skipped.  */
-	cmpl    $-ENOSYS, %eax
-	jne     62f
-
-	subq	$cond_futex, %rdi
-#endif
-
-61:	movl	$(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi
-60:	xorb	%r15b, %r15b
-	xorl	%eax, %eax
-	/* The following only works like this because we only support
-	   two clocks, represented using a single bit.  */
-	testl	$1, cond_nwaiters(%rdi)
-	movl	$FUTEX_CLOCK_REALTIME, %edx
-	movl	$0xffffffff, %r9d
-	cmove	%edx, %eax
-	orl	%eax, %esi
-	movq	%r12, %rdx
-	addq	$cond_futex, %rdi
-	movl	$SYS_futex, %eax
-	syscall
-62:	movq	%rax, %r14
-
-	movl	(%rsp), %edi
-	callq	__pthread_disable_asynccancel
-.LcleanupEND1:
-
-	/* Lock.  */
-	movq	8(%rsp), %rdi
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jne	35f
-
-36:	movl	broadcast_seq(%rdi), %edx
-
-	movq	woken_seq(%rdi), %rax
-
-	movq	wakeup_seq(%rdi), %r9
-
-	cmpl	4(%rsp), %edx
-	jne	53f
-
-	cmpq	24(%rsp), %r9
-	jbe	45f
-
-	cmpq	%rax, %r9
-	ja	39f
-
-45:	cmpq	$-ETIMEDOUT, %r14
-	je	99f
-
-	/* We need to go back to futex_wait.  If we're using requeue_pi, then
-	   release the mutex we had acquired and go back.  */
-	test	%r15b, %r15b
-	jz	38b
-
-	/* Adjust the mutex values first and then unlock it.  The unlock
-	   should always succeed or else the kernel did not lock the
-	   mutex correctly.  */
-	movq	%r8, %rdi
-	callq	__pthread_mutex_cond_lock_adjust
-	xorl	%esi, %esi
-	callq	__pthread_mutex_unlock_usercnt
-	/* Reload cond_var.  */
-	movq	8(%rsp), %rdi
-	jmp	38b
-
-99:	incq	wakeup_seq(%rdi)
-	incl	cond_futex(%rdi)
-	movl	$ETIMEDOUT, %r14d
-	jmp	44f
-
-53:	xorq	%r14, %r14
-	jmp	54f
-
-39:	xorq	%r14, %r14
-44:	incq	woken_seq(%rdi)
-
-54:	subl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	cmpq	$0xffffffffffffffff, total_seq(%rdi)
-	jne	55f
-	movl	cond_nwaiters(%rdi), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	55f
-
-	addq	$cond_nwaiters, %rdi
-	LP_OP(cmp) $-1, dep_mutex-cond_nwaiters(%rdi)
-	movl	$1, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-	subq	$cond_nwaiters, %rdi
-
-55:	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	jne	40f
-
-	/* If requeue_pi is used the kernel performs the locking of the
-	   mutex. */
-41:	movq	16(%rsp), %rdi
-	testb	%r15b, %r15b
-	jnz	64f
-
-	callq	__pthread_mutex_cond_lock
-
-63:	testq	%rax, %rax
-	cmoveq	%r14, %rax
-
-48:	addq	$FRAME_SIZE, %rsp
-	cfi_adjust_cfa_offset(-FRAME_SIZE)
-	popq	%r15
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r15)
-	popq	%r14
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r14)
-	popq	%r13
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r13)
-	popq	%r12
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r12)
-
-	retq
-
-	cfi_restore_state
-
-64:	callq	__pthread_mutex_cond_lock_adjust
-	movq	%r14, %rax
-	jmp	48b
-
-	/* Initial locking failed.  */
-31:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-	jmp	32b
-
-	/* Unlock in loop requires wakeup.  */
-33:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	jmp	34b
-
-	/* Locking in loop failed.  */
-35:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-	jmp	36b
-
-	/* Unlock after loop requires wakeup.  */
-40:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	jmp	41b
-
-	/* The initial unlocking of the mutex failed.  */
-46:	movq	8(%rsp), %rdi
-	movq	%rax, (%rsp)
-	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	jne	47f
-
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-
-47:	movq	(%rsp), %rax
-	jmp	48b
-
-	.size	__pthread_cond_timedwait, .-__pthread_cond_timedwait
-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
-		  GLIBC_2_3_2)
-
-
-	.align	16
-	.type	__condvar_cleanup2, @function
-__condvar_cleanup2:
-	/* Stack frame:
-
-	   rsp + 72
-		    +--------------------------+
-	   rsp + 64 | %r12                     |
-		    +--------------------------+
-	   rsp + 56 | %r13                     |
-		    +--------------------------+
-	   rsp + 48 | %r14                     |
-		    +--------------------------+
-	   rsp + 24 | unused                   |
-		    +--------------------------+
-	   rsp + 16 | mutex pointer            |
-		    +--------------------------+
-	   rsp +  8 | condvar pointer          |
-		    +--------------------------+
-	   rsp +  4 | old broadcast_seq value  |
-		    +--------------------------+
-	   rsp +  0 | old cancellation mode    |
-		    +--------------------------+
-	*/
-
-	movq	%rax, 24(%rsp)
-
-	/* Get internal lock.  */
-	movq	8(%rsp), %rdi
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jz	1f
-
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-
-1:	movl	broadcast_seq(%rdi), %edx
-	cmpl	4(%rsp), %edx
-	jne	3f
-
-	/* We increment the wakeup_seq counter only if it is lower than
-	   total_seq.  If this is not the case the thread was woken and
-	   then canceled.  In this case we ignore the signal.  */
-	movq	total_seq(%rdi), %rax
-	cmpq	wakeup_seq(%rdi), %rax
-	jbe	6f
-	incq	wakeup_seq(%rdi)
-	incl	cond_futex(%rdi)
-6:	incq	woken_seq(%rdi)
-
-3:	subl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	xorq	%r12, %r12
-	cmpq	$0xffffffffffffffff, total_seq(%rdi)
-	jne	4f
-	movl	cond_nwaiters(%rdi), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	4f
-
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-	leaq	cond_nwaiters(%rdi), %rdi
-	movl	$1, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-	subq	$cond_nwaiters, %rdi
-	movl	$1, %r12d
-
-4:	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	je	2f
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-
-	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	testq	%r12, %r12
-	jnz	5f
-	addq	$cond_futex, %rdi
-	LP_OP(cmp) $-1, dep_mutex-cond_futex(%rdi)
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-
-	/* Lock the mutex only if we don't own it already.  This only happens
-	   in case of PI mutexes, if we got cancelled after a successful
-	   return of the futex syscall and before disabling async
-	   cancellation.  */
-5:	movq	16(%rsp), %rdi
-	movl	MUTEX_KIND(%rdi), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	jne	7f
-
-	movl	(%rdi), %eax
-	andl	$TID_MASK, %eax
-	cmpl	%eax, %fs:TID
-	jne	7f
-	/* We managed to get the lock.  Fix it up before returning.  */
-	callq	__pthread_mutex_cond_lock_adjust
-	jmp	8f
-
-7:	callq	__pthread_mutex_cond_lock
-
-8:	movq	24(%rsp), %rdi
-	movq	FRAME_SIZE(%rsp), %r15
-	movq	FRAME_SIZE+8(%rsp), %r14
-	movq	FRAME_SIZE+16(%rsp), %r13
-	movq	FRAME_SIZE+24(%rsp), %r12
-.LcallUR:
-	call	_Unwind_Resume
-	hlt
-.LENDCODE:
-	cfi_endproc
-	.size	__condvar_cleanup2, .-__condvar_cleanup2
-
-
-	.section .gcc_except_table,"a",@progbits
-.LexceptSTART:
-	.byte	DW_EH_PE_omit			# @LPStart format
-	.byte	DW_EH_PE_omit			# @TType format
-	.byte	DW_EH_PE_uleb128		# call-site format
-	.uleb128 .Lcstend-.Lcstbegin
-.Lcstbegin:
-	.uleb128 .LcleanupSTART1-.LSTARTCODE
-	.uleb128 .LcleanupEND1-.LcleanupSTART1
-	.uleb128 __condvar_cleanup2-.LSTARTCODE
-	.uleb128  0
-	.uleb128 .LcallUR-.LSTARTCODE
-	.uleb128 .LENDCODE-.LcallUR
-	.uleb128 0
-	.uleb128  0
-.Lcstend:
-
-
-#ifdef SHARED
-	.hidden	DW.ref.__gcc_personality_v0
-	.weak	DW.ref.__gcc_personality_v0
-	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
-	.align	LP_SIZE
-	.type	DW.ref.__gcc_personality_v0, @object
-	.size	DW.ref.__gcc_personality_v0, LP_SIZE
-DW.ref.__gcc_personality_v0:
-	ASM_ADDR __gcc_personality_v0
-#endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
deleted file mode 100644
index c82f37baab..0000000000
--- a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ /dev/null
@@ -1,555 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <lowlevellock.h>
-#include <lowlevelcond.h>
-#include <tcb-offsets.h>
-#include <pthread-pi-defines.h>
-#include <pthread-errnos.h>
-#include <stap-probe.h>
-
-#include <kernel-features.h>
-
-
-	.text
-
-/* int pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)  */
-	.globl	__pthread_cond_wait
-	.type	__pthread_cond_wait, @function
-	.align	16
-__pthread_cond_wait:
-.LSTARTCODE:
-	cfi_startproc
-#ifdef SHARED
-	cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect,
-			DW.ref.__gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART)
-#else
-	cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
-#endif
-
-#define FRAME_SIZE (32+8)
-	leaq	-FRAME_SIZE(%rsp), %rsp
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-
-	/* Stack frame:
-
-	   rsp + 32
-		    +--------------------------+
-	   rsp + 24 | old wake_seq value       |
-		    +--------------------------+
-	   rsp + 16 | mutex pointer            |
-		    +--------------------------+
-	   rsp +  8 | condvar pointer          |
-		    +--------------------------+
-	   rsp +  4 | old broadcast_seq value  |
-		    +--------------------------+
-	   rsp +  0 | old cancellation mode    |
-		    +--------------------------+
-	*/
-
-	LIBC_PROBE (cond_wait, 2, %rdi, %rsi)
-
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-
-	/* Prepare structure passed to cancellation handler.  */
-	movq	%rdi, 8(%rsp)
-	movq	%rsi, 16(%rsp)
-
-	je	15f
-	mov	%RSI_LP, dep_mutex(%rdi)
-
-	/* Get internal lock.  */
-15:	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jne	1f
-
-	/* Unlock the mutex.  */
-2:	movq	16(%rsp), %rdi
-	xorl	%esi, %esi
-	callq	__pthread_mutex_unlock_usercnt
-
-	testl	%eax, %eax
-	jne	12f
-
-	movq	8(%rsp), %rdi
-	incq	total_seq(%rdi)
-	incl	cond_futex(%rdi)
-	addl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Get and store current wakeup_seq value.  */
-	movq	8(%rsp), %rdi
-	movq	wakeup_seq(%rdi), %r9
-	movl	broadcast_seq(%rdi), %edx
-	movq	%r9, 24(%rsp)
-	movl	%edx, 4(%rsp)
-
-	/* Unlock.  */
-8:	movl	cond_futex(%rdi), %edx
-	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	jne	3f
-
-.LcleanupSTART:
-4:	callq	__pthread_enable_asynccancel
-	movl	%eax, (%rsp)
-
-	xorq	%r10, %r10
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-	leaq	cond_futex(%rdi), %rdi
-	movl	$FUTEX_WAIT, %esi
-	je	60f
-
-	mov	dep_mutex-cond_futex(%rdi), %R8_LP
-	/* Requeue to a non-robust PI mutex if the PI bit is set and
-	the robust bit is not set.  */
-	movl	MUTEX_KIND(%r8), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	jne	61f
-
-	movl	$(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
-	movl	$SYS_futex, %eax
-	syscall
-
-	cmpl	$0, %eax
-	sete	%r8b
-
-#ifdef __ASSUME_REQUEUE_PI
-	jmp	62f
-#else
-	je	62f
-
-	/* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
-	   successfully, it has already locked the mutex for us and the
-	   pi_flag (%r8b) is set to denote that fact.  However, if another
-	   thread changed the futex value before we entered the wait, the
-	   syscall may return an EAGAIN and the mutex is not locked.  We go
-	   ahead with a success anyway since later we look at the pi_flag to
-	   decide if we got the mutex or not.  The sequence numbers then make
-	   sure that only one of the threads actually wake up.  We retry using
-	   normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
-	   and PI futexes don't mix.
-
-	   Note that we don't check for EAGAIN specifically; we assume that the
-	   only other error the futex function could return is EAGAIN since
-	   anything else would mean an error in our function.  It is too
-	   expensive to do that check for every call (which is 	quite common in
-	   case of a large number of threads), so it has been skipped.  */
-	cmpl	$-ENOSYS, %eax
-	jne	62f
-
-# ifndef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAIT, %esi
-# endif
-#endif
-
-61:
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
-#else
-	orl	%fs:PRIVATE_FUTEX, %esi
-#endif
-60:	xorb	%r8b, %r8b
-	movl	$SYS_futex, %eax
-	syscall
-
-62:	movl	(%rsp), %edi
-	callq	__pthread_disable_asynccancel
-.LcleanupEND:
-
-	/* Lock.  */
-	movq	8(%rsp), %rdi
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jnz	5f
-
-6:	movl	broadcast_seq(%rdi), %edx
-
-	movq	woken_seq(%rdi), %rax
-
-	movq	wakeup_seq(%rdi), %r9
-
-	cmpl	4(%rsp), %edx
-	jne	16f
-
-	cmpq	24(%rsp), %r9
-	jbe	19f
-
-	cmpq	%rax, %r9
-	jna	19f
-
-	incq	woken_seq(%rdi)
-
-	/* Unlock */
-16:	subl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	cmpq	$0xffffffffffffffff, total_seq(%rdi)
-	jne	17f
-	movl	cond_nwaiters(%rdi), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	17f
-
-	addq	$cond_nwaiters, %rdi
-	LP_OP(cmp) $-1, dep_mutex-cond_nwaiters(%rdi)
-	movl	$1, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-	subq	$cond_nwaiters, %rdi
-
-17:	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	jne	10f
-
-	/* If requeue_pi is used the kernel performs the locking of the
-	   mutex. */
-11:	movq	16(%rsp), %rdi
-	testb	%r8b, %r8b
-	jnz	18f
-
-	callq	__pthread_mutex_cond_lock
-
-14:	leaq	FRAME_SIZE(%rsp), %rsp
-	cfi_adjust_cfa_offset(-FRAME_SIZE)
-
-	/* We return the result of the mutex_lock operation.  */
-	retq
-
-	cfi_adjust_cfa_offset(FRAME_SIZE)
-
-18:	callq	__pthread_mutex_cond_lock_adjust
-	xorl	%eax, %eax
-	jmp	14b
-
-	/* We need to go back to futex_wait.  If we're using requeue_pi, then
-	   release the mutex we had acquired and go back.  */
-19:	testb	%r8b, %r8b
-	jz	8b
-
-	/* Adjust the mutex values first and then unlock it.  The unlock
-	   should always succeed or else the kernel did not lock the mutex
-	   correctly.  */
-	movq	16(%rsp), %rdi
-	callq	__pthread_mutex_cond_lock_adjust
-	movq	%rdi, %r8
-	xorl	%esi, %esi
-	callq	__pthread_mutex_unlock_usercnt
-	/* Reload cond_var.  */
-	movq	8(%rsp), %rdi
-	jmp	8b
-
-	/* Initial locking failed.  */
-1:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-	jmp	2b
-
-	/* Unlock in loop requires wakeup.  */
-3:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	/* The call preserves %rdx.  */
-	callq	__lll_unlock_wake
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-	jmp	4b
-
-	/* Locking in loop failed.  */
-5:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-	jmp	6b
-
-	/* Unlock after loop requires wakeup.  */
-10:
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	jmp	11b
-
-	/* The initial unlocking of the mutex failed.  */
-12:	movq	%rax, %r10
-	movq	8(%rsp), %rdi
-	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	je	13f
-
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-
-13:	movq	%r10, %rax
-	jmp	14b
-
-	.size	__pthread_cond_wait, .-__pthread_cond_wait
-versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
-		  GLIBC_2_3_2)
-
-
-	.align	16
-	.type	__condvar_cleanup1, @function
-	.globl	__condvar_cleanup1
-	.hidden	__condvar_cleanup1
-__condvar_cleanup1:
-	/* Stack frame:
-
-	   rsp + 32
-		    +--------------------------+
-	   rsp + 24 | unused                   |
-		    +--------------------------+
-	   rsp + 16 | mutex pointer            |
-		    +--------------------------+
-	   rsp +  8 | condvar pointer          |
-		    +--------------------------+
-	   rsp +  4 | old broadcast_seq value  |
-		    +--------------------------+
-	   rsp +  0 | old cancellation mode    |
-		    +--------------------------+
-	*/
-
-	movq	%rax, 24(%rsp)
-
-	/* Get internal lock.  */
-	movq	8(%rsp), %rdi
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-#if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-#else
-	cmpxchgl %esi, cond_lock(%rdi)
-#endif
-	jz	1f
-
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-#if cond_lock != 0
-	subq	$cond_lock, %rdi
-#endif
-
-1:	movl	broadcast_seq(%rdi), %edx
-	cmpl	4(%rsp), %edx
-	jne	3f
-
-	/* We increment the wakeup_seq counter only if it is lower than
-	   total_seq.  If this is not the case the thread was woken and
-	   then canceled.  In this case we ignore the signal.  */
-	movq	total_seq(%rdi), %rax
-	cmpq	wakeup_seq(%rdi), %rax
-	jbe	6f
-	incq	wakeup_seq(%rdi)
-	incl	cond_futex(%rdi)
-6:	incq	woken_seq(%rdi)
-
-3:	subl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Wake up a thread which wants to destroy the condvar object.  */
-	xorl	%ecx, %ecx
-	cmpq	$0xffffffffffffffff, total_seq(%rdi)
-	jne	4f
-	movl	cond_nwaiters(%rdi), %eax
-	andl	$~((1 << nwaiters_shift) - 1), %eax
-	jne	4f
-
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-	leaq	cond_nwaiters(%rdi), %rdi
-	movl	$1, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-	subq	$cond_nwaiters, %rdi
-	movl	$1, %ecx
-
-4:	LOCK
-#if cond_lock == 0
-	decl	(%rdi)
-#else
-	decl	cond_lock(%rdi)
-#endif
-	je	2f
-#if cond_lock != 0
-	addq	$cond_lock, %rdi
-#endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	/* The call preserves %rcx.  */
-	callq	__lll_unlock_wake
-
-	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	testl	%ecx, %ecx
-	jnz	5f
-	addq	$cond_futex, %rdi
-	LP_OP(cmp) $-1, dep_mutex-cond_futex(%rdi)
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE, %eax
-	movl	$(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-#else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-	orl	$FUTEX_WAKE, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-
-	/* Lock the mutex only if we don't own it already.  This only happens
-	   in case of PI mutexes, if we got cancelled after a successful
-	   return of the futex syscall and before disabling async
-	   cancellation.  */
-5:	movq	16(%rsp), %rdi
-	movl	MUTEX_KIND(%rdi), %eax
-	andl	$(ROBUST_BIT|PI_BIT), %eax
-	cmpl	$PI_BIT, %eax
-	jne	7f
-
-	movl	(%rdi), %eax
-	andl	$TID_MASK, %eax
-	cmpl	%eax, %fs:TID
-	jne	7f
-	/* We managed to get the lock.  Fix it up before returning.  */
-	callq	__pthread_mutex_cond_lock_adjust
-	jmp	8f
-
-
-7:	callq	__pthread_mutex_cond_lock
-
-8:	movq	24(%rsp), %rdi
-.LcallUR:
-	call	_Unwind_Resume
-	hlt
-.LENDCODE:
-	cfi_endproc
-	.size	__condvar_cleanup1, .-__condvar_cleanup1
-
-
-	.section .gcc_except_table,"a",@progbits
-.LexceptSTART:
-	.byte	DW_EH_PE_omit			# @LPStart format
-	.byte	DW_EH_PE_omit			# @TType format
-	.byte	DW_EH_PE_uleb128		# call-site format
-	.uleb128 .Lcstend-.Lcstbegin
-.Lcstbegin:
-	.uleb128 .LcleanupSTART-.LSTARTCODE
-	.uleb128 .LcleanupEND-.LcleanupSTART
-	.uleb128 __condvar_cleanup1-.LSTARTCODE
-	.uleb128 0
-	.uleb128 .LcallUR-.LSTARTCODE
-	.uleb128 .LENDCODE-.LcallUR
-	.uleb128 0
-	.uleb128 0
-.Lcstend:
-
-
-#ifdef SHARED
-	.hidden	DW.ref.__gcc_personality_v0
-	.weak	DW.ref.__gcc_personality_v0
-	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
-	.align	LP_SIZE
-	.type	DW.ref.__gcc_personality_v0, @object
-	.size	DW.ref.__gcc_personality_v0, LP_SIZE
-DW.ref.__gcc_personality_v0:
-	ASM_ADDR __gcc_personality_v0
-#endif
diff --git a/sysdeps/x86/bits/pthreadtypes.h b/sysdeps/x86/bits/pthreadtypes.h
index c641a405e3..38bc454682 100644
--- a/sysdeps/x86/bits/pthreadtypes.h
+++ b/sysdeps/x86/bits/pthreadtypes.h
@@ -140,14 +140,27 @@ typedef union
 {
   struct
   {
-    int __lock;
-    unsigned int __futex;
-    __extension__ unsigned long long int __total_seq;
-    __extension__ unsigned long long int __wakeup_seq;
-    __extension__ unsigned long long int __woken_seq;
-    void *__mutex;
-    unsigned int __nwaiters;
-    unsigned int __broadcast_seq;
+    __extension__ union
+    {
+      __extension__ unsigned long long int __wseq;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __wseq32;
+    };
+    __extension__ union
+    {
+      __extension__ unsigned long long int __g1_start;
+      struct {
+	unsigned int __low;
+	unsigned int __high;
+      } __g1_start32;
+    };
+    unsigned int __g_refs[2];
+    unsigned int __g_size[2];
+    unsigned int __g1_orig_size;
+    unsigned int __wrefs;
+    unsigned int __g_signals[2];
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   __extension__ long long int __align;