about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--nptl/ChangeLog92
-rw-r--r--nptl/Makefile2
-rw-r--r--nptl/pthread_cond_init.c1
-rw-r--r--nptl/sysdeps/pthread/pthread_cond_broadcast.c22
-rw-r--r--nptl/sysdeps/pthread/pthread_cond_signal.c15
-rw-r--r--nptl/sysdeps/pthread/pthread_cond_timedwait.c26
-rw-r--r--nptl/sysdeps/pthread/pthread_cond_wait.c28
-rw-r--r--nptl/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S53
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S11
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/ia64/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h11
-rw-r--r--nptl/sysdeps/unix/sysv/linux/lowlevelcond.sym1
-rw-r--r--nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/s390/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h24
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/bits/pthreadtypes.h3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S38
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S13
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S16
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S26
-rw-r--r--sysdeps/i386/i486/bits/atomic.h64
30 files changed, 327 insertions, 209 deletions
diff --git a/ChangeLog b/ChangeLog
index 4f38604c52..1fd3646e3f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2004-06-03  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/i386/i486/bits/atomic.h: Optimize a bit.
+
 2004-05-07  Dmitry V. Levin  <ldv@altlinux.org>
 
 	* argp/argp-help.c (__argp_error, __argp_failure): Check result
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 071e7048cc..1ff058e21b 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,95 @@
+2004-05-21  Jakub Jelinek  <jakub@redhat.com>
+
+	* Makefile (tests): Add tst-cond16.
+	* sysdeps/unix/sysv/linux/lowlevelcond.sym (cond_futex): Add.
+	* pthread_cond_init.c (__pthread_cond_init): Clear __data.__futex.
+	* sysdeps/unix/sysv/linux/i386/bits/pthreadtypes.h (pthread_cond_t):
+	Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S
+	(__pthread_cond_signal): Increment __futex at the same time as
+	__wakeup_seq or __total_seq.  Pass address of __futex instead of
+	address of low 32-bits of __wakeup_seq to futex syscall.
+	* sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S
+	(__pthread_cond_wait): Likewise.  Pass __futex value from before
+	releasing internal lock to FUTEX_WAIT.
+	* sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S
+	(__pthread_cond_timedwait): Likewise.
+	* sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S
+	(FUTEX_CMP_REQUEUE): Define.
+	(__pthread_cond_broadcast): Set __futex to 2 * __total_seq.
+	Use FUTEX_CMP_REQUEUE operation instead of FUTEX_REQUEUE.
+	Pass __futex value from before the unlock and __futex address instead
+	of address of low 32-bits of __wakeup_seq to futex syscall.
+	Fallback to FUTEX_WAKE all on any errors.
+	* sysdeps/unix/sysv/linux/alpha/lowlevellock.h (FUTEX_CMP_REQUEUE):
+	Define.
+	(lll_futex_requeue): Add val argument, use FUTEX_CMP_REQUEUE
+	internally.  Return non-zero if error, zero if success.
+	* sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_cond_t):
+	Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/unix/sysv/linux/s390/lowlevellock.h (FUTEX_CMP_REQUEUE):
+	Define.
+	(lll_futex_requeue): Add val argument, return 1 unconditionally
+	for the time being.
+	* sysdeps/unix/sysv/linux/s390/bits/pthreadtypes.h (pthread_cond_t):
+	Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/unix/sysv/linux/powerpc/lowlevellock.h (FUTEX_CMP_REQUEUE):
+	Define.
+	(lll_futex_requeue): Add val argument, use FUTEX_CMP_REQUEUE
+	internally.  Return non-zero if error, zero if success.
+	* sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
+	(pthread_cond_t): Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/unix/sysv/linux/sparc/lowlevellock.h (FUTEX_CMP_REQUEUE):
+	Define.
+	(lll_futex_requeue): Add val argument, use FUTEX_CMP_REQUEUE
+	internally.  Return non-zero if error, zero if success.
+	* sysdeps/unix/sysv/linux/sparc/bits/pthreadtypes.h (pthread_cond_t):
+	Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/unix/sysv/linux/ia64/lowlevellock.h (FUTEX_CMP_REQUEUE):
+	Define.
+	(lll_futex_requeue): Add val argument, use FUTEX_CMP_REQUEUE
+	internally.  Return non-zero if error, zero if success.
+	* sysdeps/unix/sysv/linux/ia64/bits/pthreadtypes.h (pthread_cond_t):
+	Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/unix/sysv/linux/x86_64/bits/pthreadtypes.h (pthread_cond_t):
+	Add __data.__futex field, reshuffle __data.__clock.
+	* sysdeps/pthread/pthread_cond_signal.c (__pthread_cond_signal):
+	Increment __futex at the same time as __wakeup_seq or __total_seq.
+	Pass address of __futex instead of address of low 32-bits of
+	__wakeup_seq to futex syscall.
+	* sysdeps/pthread/pthread_cond_wait.c (__pthread_cond_wait): Likewise.
+	Pass __futex value from before releasing internal lock
+	to FUTEX_WAIT.
+	* sysdeps/pthread/pthread_cond_timedwait.c (__pthread_cond_timedwait):
+	Likewise.  Avoid unnecessary shadowing of variables.
+	* sysdeps/pthread/pthread_cond_broadcast.c (__pthread_cond_broadcast):
+	Set __futex to 2 * __total_seq.  Pass __futex value from before the
+	unlock and __futex address instead of address of low 32-bits of
+	__wakeup_seq to futex_requeue macro, adjust for new return value
+	meaning.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+	(__pthread_cond_signal): Increment __futex at the same time as
+	__wakeup_seq or __total_seq.  Pass address of __futex instead of
+	address of low 32-bits of __wakeup_seq to futex syscall.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+	(__pthread_cond_wait): Likewise.  Pass __futex value from before
+	releasing internal lock to FUTEX_WAIT.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+	(__pthread_cond_timedwait): Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+	(FUTEX_CMP_REQUEUE): Define.
+	(__pthread_cond_broadcast): Set __futex to 2 * __total_seq.
+	Use FUTEX_CMP_REQUEUE operation instead of FUTEX_REQUEUE.
+	Pass __futex value from before the unlock and __futex address instead
+	of address of low 32-bits of __wakeup_seq to futex syscall.
+	Fallback to FUTEX_WAKE all on any errors.
+
+2004-06-03  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/sh/lowlevellock.h (lll_mutex_lock):
+	Add nop to align the end of critical section.
+	(lll_mutex_cond_lock, lll_mutex_timedlock): Likewise.
+
 2004-06-01  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
 
 	* sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h (pthread_cond_t):
diff --git a/nptl/Makefile b/nptl/Makefile
index 687a6a0cfc..fd3ed9f699 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -194,7 +194,7 @@ tests = tst-attr1 tst-attr2 tst-attr3 \
 	tst-spin1 tst-spin2 tst-spin3 \
 	tst-cond1 tst-cond2 tst-cond3 tst-cond4 tst-cond5 tst-cond6 tst-cond7 \
 	tst-cond8 tst-cond9 tst-cond10 tst-cond11 tst-cond12 tst-cond13 \
-	tst-cond14 tst-cond15 tst-cond17 tst-cond18 \
+	tst-cond14 tst-cond15 tst-cond16 tst-cond17 tst-cond18 \
 	tst-rwlock1 tst-rwlock2 tst-rwlock3 tst-rwlock4 tst-rwlock5 \
 	tst-rwlock6 tst-rwlock7 tst-rwlock8 tst-rwlock9 tst-rwlock10 \
 	tst-rwlock11 tst-rwlock12 tst-rwlock13 \
diff --git a/nptl/pthread_cond_init.c b/nptl/pthread_cond_init.c
index d059da2315..f5fbd64e96 100644
--- a/nptl/pthread_cond_init.c
+++ b/nptl/pthread_cond_init.c
@@ -31,6 +31,7 @@ __pthread_cond_init (cond, cond_attr)
   struct pthread_condattr *icond_attr = (struct pthread_condattr *) cond_attr;
 
   cond->__data.__lock = LLL_MUTEX_LOCK_INITIALIZER;
+  cond->__data.__futex = 0;
   cond->__data.__clock = (icond_attr == NULL
 			  ? CLOCK_REALTIME : (icond_attr->value & 0xfe) >> 1);
   cond->__data.__total_seq = 0;
diff --git a/nptl/sysdeps/pthread/pthread_cond_broadcast.c b/nptl/sysdeps/pthread/pthread_cond_broadcast.c
index a42c579658..1eac8ecf83 100644
--- a/nptl/sysdeps/pthread/pthread_cond_broadcast.c
+++ b/nptl/sysdeps/pthread/pthread_cond_broadcast.c
@@ -41,35 +41,29 @@ __pthread_cond_broadcast (cond)
       /* Yes.  Mark them all as woken.  */
       cond->__data.__wakeup_seq = cond->__data.__total_seq;
       cond->__data.__woken_seq = cond->__data.__total_seq;
+      cond->__data.__futex = (unsigned int) cond->__data.__total_seq * 2;
+      int futex_val = cond->__data.__futex;
       /* Signal that a broadcast happened.  */
       ++cond->__data.__broadcast_seq;
 
       /* We are done.  */
       lll_mutex_unlock (cond->__data.__lock);
 
-      /* The futex syscall operates on a 32-bit word.  That is fine,
-	 we just use the low 32 bits of the sequence counter.  */
-#if BYTE_ORDER == LITTLE_ENDIAN
-      int *futex = ((int *) (&cond->__data.__wakeup_seq));
-#elif BYTE_ORDER == BIG_ENDIAN
-      int *futex = ((int *) (&cond->__data.__wakeup_seq)) + 1;
-#else
-# error "No valid byte order"
-#endif
-
       /* Do not use requeue for pshared condvars.  */
       if (cond->__data.__mutex == (void *) ~0l)
 	goto wake_all;
 
       /* Wake everybody.  */
       pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
-      if (__builtin_expect (lll_futex_requeue (futex, 1, INT_MAX,
-					       &mut->__data.__lock) == -EINVAL,
-			    0))
+      /* lll_futex_requeue returns 0 for success and non-zero
+	 for errors.  */
+      if (__builtin_expect (lll_futex_requeue (&cond->__data.__futex, 1,
+					       INT_MAX, &mut->__data.__lock,
+					       futex_val), 0))
 	{
 	  /* The requeue functionality is not available.  */
 	wake_all:
-	  lll_futex_wake (futex, INT_MAX);
+	  lll_futex_wake (&cond->__data.__futex, INT_MAX);
 	}
 
       /* That's all.  */
diff --git a/nptl/sysdeps/pthread/pthread_cond_signal.c b/nptl/sysdeps/pthread/pthread_cond_signal.c
index 76203ac2cc..f5623480f8 100644
--- a/nptl/sysdeps/pthread/pthread_cond_signal.c
+++ b/nptl/sysdeps/pthread/pthread_cond_signal.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
 
@@ -40,19 +40,10 @@ __pthread_cond_signal (cond)
     {
       /* Yes.  Mark one of them as woken.  */
       ++cond->__data.__wakeup_seq;
-
-      /* The futex syscall operates on a 32-bit word.  That is fine,
-	 we just use the low 32 bits of the sequence counter.  */
-#if BYTE_ORDER == LITTLE_ENDIAN
-      int *futex = ((int *) (&cond->__data.__wakeup_seq));
-#elif BYTE_ORDER == BIG_ENDIAN
-      int *futex = ((int *) (&cond->__data.__wakeup_seq)) + 1;
-#else
-# error "No valid byte order"
-#endif
+      ++cond->__data.__futex;
 
       /* Wake one.  */
-      lll_futex_wake (futex, 1);
+      lll_futex_wake (&cond->__data.__futex, 1);
     }
 
   /* We are done.  */
diff --git a/nptl/sysdeps/pthread/pthread_cond_timedwait.c b/nptl/sysdeps/pthread/pthread_cond_timedwait.c
index 96b1029cf5..940b51b4be 100644
--- a/nptl/sysdeps/pthread/pthread_cond_timedwait.c
+++ b/nptl/sysdeps/pthread/pthread_cond_timedwait.c
@@ -66,6 +66,7 @@ __pthread_cond_timedwait (cond, mutex, abstime)
 
   /* We have one new user of the condvar.  */
   ++cond->__data.__total_seq;
+  ++cond->__data.__futex;
 
   /* Remember the mutex we are using here.  If there is already a
      different address store this is a bad user bug.  Do not store
@@ -89,27 +90,17 @@ __pthread_cond_timedwait (cond, mutex, abstime)
   /* Remember the broadcast counter.  */
   cbuffer.bc_seq = cond->__data.__broadcast_seq;
 
-  /* The futex syscall operates on a 32-bit word.  That is fine, we
-     just use the low 32 bits of the sequence counter.  */
-#if BYTE_ORDER == LITTLE_ENDIAN
-  int *futex = ((int *) (&cond->__data.__wakeup_seq));
-#elif BYTE_ORDER == BIG_ENDIAN
-  int *futex = ((int *) (&cond->__data.__wakeup_seq)) + 1;
-#else
-# error "No valid byte order"
-#endif
-
   while (1)
     {
       struct timespec rt;
       {
 #ifdef __NR_clock_gettime
 	INTERNAL_SYSCALL_DECL (err);
-	int val;
-	val = INTERNAL_SYSCALL (clock_gettime, err, 2,
+	int ret;
+	ret = INTERNAL_SYSCALL (clock_gettime, err, 2,
 				cond->__data.__clock, &rt);
 # ifndef __ASSUME_POSIX_TIMERS
-	if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (val, err), 0))
+	if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (ret, err), 0))
 	  {
 	    struct timeval tv;
 	    (void) gettimeofday (&tv, NULL);
@@ -149,15 +140,17 @@ __pthread_cond_timedwait (cond, mutex, abstime)
 	  goto timeout;
 	}
 
+      unsigned int futex_val = cond->__data.__futex;
+
       /* Prepare to wait.  Release the condvar futex.  */
       lll_mutex_unlock (cond->__data.__lock);
 
       /* Enable asynchronous cancellation.  Required by the standard.  */
       cbuffer.oldtype = __pthread_enable_asynccancel ();
 
-      /* Wait until woken by signal or broadcast.  Note that we
-	 truncate the 'val' value to 32 bits.  */
-      err = lll_futex_timed_wait (futex, (unsigned int) val, &rt);
+      /* Wait until woken by signal or broadcast.  */
+      err = lll_futex_timed_wait (&cond->__data.__futex,
+				  futex_val, &rt);
 
       /* Disable asynchronous cancellation.  */
       __pthread_disable_asynccancel (cbuffer.oldtype);
@@ -180,6 +173,7 @@ __pthread_cond_timedwait (cond, mutex, abstime)
 	timeout:
 	  /* Yep.  Adjust the counters.  */
 	  ++cond->__data.__wakeup_seq;
+	  ++cond->__data.__futex;
 
 	  /* The error value.  */
 	  result = ETIMEDOUT;
diff --git a/nptl/sysdeps/pthread/pthread_cond_wait.c b/nptl/sysdeps/pthread/pthread_cond_wait.c
index 128f5a2d99..45187b5240 100644
--- a/nptl/sysdeps/pthread/pthread_cond_wait.c
+++ b/nptl/sysdeps/pthread/pthread_cond_wait.c
@@ -52,20 +52,14 @@ __condvar_cleanup (void *arg)
 	 appropriately.  */
       ++cbuffer->cond->__data.__wakeup_seq;
       ++cbuffer->cond->__data.__woken_seq;
+      ++cbuffer->cond->__data.__futex;
     }
 
   /* We are done.  */
   lll_mutex_unlock (cbuffer->cond->__data.__lock);
 
   /* Wake everybody to make sure no condvar signal gets lost.  */
-#if BYTE_ORDER == LITTLE_ENDIAN
-  int *futex = ((int *) (&cbuffer->cond->__data.__wakeup_seq));
-#elif BYTE_ORDER == BIG_ENDIAN
-  int *futex = ((int *) (&cbuffer->cond->__data.__wakeup_seq)) + 1;
-#else
-# error "No valid byte order"
-#endif
-  lll_futex_wake (futex, INT_MAX);
+  lll_futex_wake (&cbuffer->cond->__data.__futex, INT_MAX);
 
   /* Get the mutex before returning unless asynchronous cancellation
      is in effect.  */
@@ -95,6 +89,7 @@ __pthread_cond_wait (cond, mutex)
 
   /* We have one new user of the condvar.  */
   ++cond->__data.__total_seq;
+  ++cond->__data.__futex;
 
   /* Remember the mutex we are using here.  If there is already a
      different address store this is a bad user bug.  Do not store
@@ -118,27 +113,18 @@ __pthread_cond_wait (cond, mutex)
   /* Remember the broadcast counter.  */
   cbuffer.bc_seq = cond->__data.__broadcast_seq;
 
-  /* The futex syscall operates on a 32-bit word.  That is fine, we
-     just use the low 32 bits of the sequence counter.  */
-#if BYTE_ORDER == LITTLE_ENDIAN
-  int *futex = ((int *) (&cond->__data.__wakeup_seq));
-#elif BYTE_ORDER == BIG_ENDIAN
-  int *futex = ((int *) (&cond->__data.__wakeup_seq)) + 1;
-#else
-# error "No valid byte order"
-#endif
-
   do
     {
+      unsigned int futex_val = cond->__data.__futex;
+
       /* Prepare to wait.  Release the condvar futex.  */
       lll_mutex_unlock (cond->__data.__lock);
 
       /* Enable asynchronous cancellation.  Required by the standard.  */
       cbuffer.oldtype = __pthread_enable_asynccancel ();
 
-      /* Wait until woken by signal or broadcast.  Note that we
-	 truncate the 'val' value to 32 bits.  */
-      lll_futex_wait (futex, (unsigned int) val);
+      /* Wait until woken by signal or broadcast.  */
+      lll_futex_wait (&cond->__data.__futex, futex_val);
 
       /* Disable asynchronous cancellation.  */
       __pthread_disable_asynccancel (cbuffer.oldtype);
diff --git a/nptl/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h
index 1bb2968120..62c853cee9 100644
--- a/nptl/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h
@@ -76,11 +76,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h
index a7079a8490..fd4a7ca4bb 100644
--- a/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h
@@ -30,6 +30,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 /* Initializer for compatibility lock.	*/
 #define LLL_MUTEX_LOCK_INITIALIZER (0)
@@ -61,14 +62,15 @@
     INTERNAL_SYSCALL_ERROR_P (__ret, __err)? -__ret : __ret;		      \
   })
 
-#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex) \
+/* Returns non-zero if error happened, zero if success.  */
+#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex, val) \
   ({									      \
     INTERNAL_SYSCALL_DECL (__err);					      \
     long int __ret;							      \
-    __ret = INTERNAL_SYSCALL (futex, __err, 5,				      \
-			      (futexp), FUTEX_REQUEUE, (nr_wake), (nr_move),  \
-			      (mutex));					      \
-    INTERNAL_SYSCALL_ERROR_P (__ret, __err)? -__ret : __ret;		      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 6,				      \
+			      (futexp), FUTEX_CMP_REQUEUE, (nr_wake),	      \
+			      (nr_move), (mutex), (val));		      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
   })
 
 
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/i386/bits/pthreadtypes.h
index 9da84c66d6..8d7858a071 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/i386/bits/pthreadtypes.h
@@ -76,11 +76,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S
index 456f3dbfb1..5471c1c927 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S
@@ -32,6 +32,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 #define EINVAL			22
 
@@ -47,8 +48,9 @@ __pthread_cond_broadcast:
 	pushl	%ebx
 	pushl	%esi
 	pushl	%edi
+	pushl	%ebp
 
-	movl	16(%esp), %ebx
+	movl	20(%esp), %ebx
 
 	/* Get internal lock.  */
 	movl	$1, %edx
@@ -61,29 +63,31 @@ __pthread_cond_broadcast:
 #endif
 	jnz	1f
 
-2:	addl	$wakeup_seq, %ebx
-	movl	total_seq+4-wakeup_seq(%ebx), %eax
-	movl	total_seq-wakeup_seq(%ebx), %ecx
-	cmpl	4(%ebx), %eax
+2:	addl	$cond_futex, %ebx
+	movl	total_seq+4-cond_futex(%ebx), %eax
+	movl	total_seq-cond_futex(%ebx), %ebp
+	cmpl	wakeup_seq+4-cond_futex(%ebx), %eax
 	ja	3f
 	jb	4f
-	cmpl	(%ebx), %ecx
+	cmpl	wakeup_seq-cond_futex(%ebx), %ebp
 	jna	4f
 
 	/* Cause all currently waiting threads to recognize they are
 	   woken up.  */
-3:	movl	%ecx, (%ebx)
-	movl	%eax, 4(%ebx)
-	movl	%ecx, woken_seq-wakeup_seq(%ebx)
-	movl	%eax, woken_seq-wakeup_seq+4(%ebx)
-	addl	$1, broadcast_seq-wakeup_seq(%ebx)
+3:	movl	%ebp, wakeup_seq-cond_futex(%ebx)
+	movl	%eax, wakeup_seq-cond_futex+4(%ebx)
+	movl	%ebp, woken_seq-cond_futex(%ebx)
+	movl	%eax, woken_seq-cond_futex+4(%ebx)
+	addl	%ebp, %ebp
+	addl	$1, broadcast_seq-cond_futex(%ebx)
+	movl	%ebp, (%ebx)
 
 	/* Get the address of the mutex used.  */
-	movl	dep_mutex-wakeup_seq(%ebx), %edi
+	movl	dep_mutex-cond_futex(%ebx), %edi
 
 	/* Unlock.  */
 	LOCK
-	subl	$1, cond_lock-wakeup_seq(%ebx)
+	subl	$1, cond_lock-cond_futex(%ebx)
 	jne	7f
 
 	/* Don't use requeue for pshared condvars.  */
@@ -91,7 +95,7 @@ __pthread_cond_broadcast:
 	je	9f
 
 	/* Wake up all threads.  */
-	movl	$FUTEX_REQUEUE, %ecx
+	movl	$FUTEX_CMP_REQUEUE, %ecx
 	movl	$SYS_futex, %eax
 	movl	$0x7fffffff, %esi
 	movl	$1, %edx
@@ -99,14 +103,18 @@ __pthread_cond_broadcast:
 # if MUTEX_FUTEX != 0
 	addl	$MUTEX_FUTEX, %edi
 # endif
-	ENTER_KERNEL
+/* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for sysenter.
+	ENTER_KERNEL  */
+	int	$0x80
 
-#ifndef __ASSUME_FUTEX_REQUEUE
-	cmpl	$-EINVAL, %eax
-	je	9f
-#endif
+	/* For any kind of error, which mainly is EAGAIN, we try again
+	   with WAKE.  The general test also covers running on old
+	   kernels.  */
+	cmpl	$0xfffff001, %eax
+	jae	9f
 
 10:	xorl	%eax, %eax
+	popl	%ebp
 	popl	%edi
 	popl	%esi
 	popl	%ebx
@@ -115,10 +123,11 @@ __pthread_cond_broadcast:
 	.align	16
 	/* Unlock.  */
 4:	LOCK
-	subl	$1, cond_lock-wakeup_seq(%ebx)
+	subl	$1, cond_lock-cond_futex(%ebx)
 	jne	5f
 
 6:	xorl	%eax, %eax
+	popl	%ebp
 	popl	%edi
 	popl	%esi
 	popl	%ebx
@@ -135,12 +144,12 @@ __pthread_cond_broadcast:
 	jmp	2b
 
 	/* Unlock in loop requires waekup.  */
-5:	leal	cond_lock-wakeup_seq(%ebx), %eax
+5:	leal	cond_lock-cond_futex(%ebx), %eax
 	call	__lll_mutex_unlock_wake
 	jmp	6b
 
 	/* Unlock in loop requires waekup.  */
-7:	leal	cond_lock-wakeup_seq(%ebx), %eax
+7:	leal	cond_lock-cond_futex(%ebx), %eax
 	call	__lll_mutex_unlock_wake
 	jmp	8b
 
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S
index 4722b4c0e0..3c5a1db59c 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S
@@ -60,22 +60,23 @@ __pthread_cond_signal:
 #endif
 	jnz	1f
 
-2:	leal	wakeup_seq(%edi), %ebx
+2:	leal	cond_futex(%edi), %ebx
 	movl	total_seq+4(%edi), %eax
 	movl	total_seq(%edi), %ecx
-	cmpl	4(%ebx), %eax
+	cmpl	wakeup_seq+4(%edi), %eax
 #if cond_lock != 0
 	/* Must use leal to preserve the flags.  */
 	leal	cond_lock(%edi), %edi
 #endif
 	ja	3f
 	jb	4f
-	cmpl	(%ebx), %ecx
+	cmpl	wakeup_seq-cond_futex(%ebx), %ecx
 	jbe	4f
 
 	/* Bump the wakeup number.  */
-3:	addl	$1, (%ebx)
-	adcl	$0, 4(%ebx)
+3:	addl	$1, wakeup_seq-cond_futex(%ebx)
+	adcl	$0, wakeup_seq-cond_futex+4(%ebx)
+	addl	$1, (%ebx)
 
 	/* Wake up one thread.  */
 	movl	$FUTEX_WAKE, %ecx
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
index 74e3172ab0..3fe7f8c17a 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
@@ -79,6 +79,7 @@ __pthread_cond_wait:
 
 	addl	$1, total_seq(%ebx)
 	adcl	$0, total_seq+4(%ebx)
+	addl	$1, cond_futex(%ebx)
 
 #define FRAME_SIZE 16
 	subl	$FRAME_SIZE, %esp
@@ -92,8 +93,10 @@ __pthread_cond_wait:
 	movl	%edx, 8(%esp)
 	movl	%eax, 12(%esp)
 
+8:	movl	cond_futex(%ebx), %edi
+
 	/* Unlock.  */
-8:	LOCK
+	LOCK
 #if cond_lock == 0
 	subl	$1, (%ebx)
 #else
@@ -107,12 +110,12 @@ __pthread_cond_wait:
 
 	movl	%esi, %ecx	/* movl $FUTEX_WAIT, %ecx */
 	movl	%edi, %edx
-	addl	$wakeup_seq, %ebx
-.Ladd_wakeup:
+	addl	$cond_futex, %ebx
+.Ladd_cond_futex:
 	movl	$SYS_futex, %eax
 	ENTER_KERNEL
-	subl	$wakeup_seq, %ebx
-.Lsub_wakeup:
+	subl	$cond_futex, %ebx
+.Lsub_cond_futex:
 
 	movl	(%esp), %eax
 	call	__pthread_disable_asynccancel
@@ -246,7 +249,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
 
 	.type	__condvar_w_cleanup2, @function
 __condvar_w_cleanup2:
-	subl	$wakeup_seq, %ebx
+	subl	$cond_futex, %ebx
 	.size	__condvar_w_cleanup2, .-__condvar_w_cleanup2
 .LSbl4:
 	.type	__condvar_w_cleanup, @function
@@ -278,6 +281,8 @@ __condvar_w_cleanup:
 	addl	$1, wakeup_seq(%ebx)
 	adcl	$0, wakeup_seq+4(%ebx)
 
+	addl	$1, cond_futex(%ebx)
+
 	addl	$1, woken_seq(%ebx)
 	adcl	$0, woken_seq+4(%ebx)
 
@@ -297,7 +302,7 @@ __condvar_w_cleanup:
 	call	__lll_mutex_unlock_wake
 
 	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	addl	$wakeup_seq, %ebx
+2:	addl	$cond_futex, %ebx
 	movl	$FUTEX_WAKE, %ecx
 	movl	$SYS_futex, %eax
 	movl	$0x7fffffff, %edx
@@ -323,15 +328,15 @@ __condvar_w_cleanup:
 	.uleb128 .Lcstend-.Lcstbegin
 .Lcstbegin:
 	.long	.LcleanupSTART-.LSTARTCODE
-	.long	.Ladd_wakeup-.LcleanupSTART
+	.long	.Ladd_cond_futex-.LcleanupSTART
 	.long	__condvar_w_cleanup-.LSTARTCODE
 	.uleb128  0
-	.long	.Ladd_wakeup-.LSTARTCODE
-	.long	.Lsub_wakeup-.Ladd_wakeup
+	.long	.Ladd_cond_futex-.LSTARTCODE
+	.long	.Lsub_cond_futex-.Ladd_cond_futex
 	.long	__condvar_w_cleanup2-.LSTARTCODE
 	.uleb128  0
-	.long	.Lsub_wakeup-.LSTARTCODE
-	.long	.LcleanupEND-.Lsub_wakeup
+	.long	.Lsub_cond_futex-.LSTARTCODE
+	.long	.LcleanupEND-.Lsub_cond_futex
 	.long	__condvar_w_cleanup-.LSTARTCODE
 	.uleb128  0
 	.long	.LcallUR-.LSTARTCODE
diff --git a/nptl/sysdeps/unix/sysv/linux/ia64/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/ia64/bits/pthreadtypes.h
index bdf3ee741b..958af47162 100644
--- a/nptl/sysdeps/unix/sysv/linux/ia64/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/ia64/bits/pthreadtypes.h
@@ -76,11 +76,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h
index 6ed21fdc14..e13358ffef 100644
--- a/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h
@@ -30,6 +30,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 /* Delay in spinlock loop.  */
 #define BUSY_WAIT_NOP          asm ("hint @pause")
@@ -52,11 +53,13 @@
    _r10 == -1 ? -_retval : _retval;					\
 })
 
-#define lll_futex_requeue(ftx, nr_wake, nr_move, mutex)			     \
+/* Returns non-zero if error happened, zero if success.  */
+#define lll_futex_requeue(ftx, nr_wake, nr_move, mutex, val)		     \
 ({									     \
-   DO_INLINE_SYSCALL(futex, 5, (long) (ftx), FUTEX_REQUEUE, (int) (nr_wake), \
-		     (int) (nr_move), (long) (mutex));			     \
-   _r10 == -1 ? -_retval : _retval;					     \
+   DO_INLINE_SYSCALL(futex, 6, (long) (ftx), FUTEX_CMP_REQUEUE,		     \
+		     (int) (nr_wake), (int) (nr_move), (long) (mutex),	     \
+		     (int) val);					     \
+   _r10 == -1;								     \
 })
 
 
diff --git a/nptl/sysdeps/unix/sysv/linux/lowlevelcond.sym b/nptl/sysdeps/unix/sysv/linux/lowlevelcond.sym
index d0559da86f..2e193e6d2e 100644
--- a/nptl/sysdeps/unix/sysv/linux/lowlevelcond.sym
+++ b/nptl/sysdeps/unix/sysv/linux/lowlevelcond.sym
@@ -4,6 +4,7 @@
 --
 
 cond_lock	offsetof (pthread_cond_t, __data.__lock)
+cond_futex	offsetof (pthread_cond_t, __data.__futex)
 cond_clock	offsetof (pthread_cond_t, __data.__clock)
 total_seq	offsetof (pthread_cond_t, __data.__total_seq)
 wakeup_seq	offsetof (pthread_cond_t, __data.__wakeup_seq)
diff --git a/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
index 794d888a13..5a000d30af 100644
--- a/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
@@ -96,11 +96,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
index 8e3f46d534..ead89ba931 100644
--- a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
@@ -30,6 +30,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 /* Initializer for compatibility lock.	*/
 #define LLL_MUTEX_LOCK_INITIALIZER (0)
@@ -64,15 +65,16 @@
     INTERNAL_SYSCALL_ERROR_P (__ret, __err) ? -__ret : __ret;		      \
   })
 
-#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex) \
+/* Returns non-zero if error happened, zero if success.  */
+#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex, val) \
   ({									      \
     INTERNAL_SYSCALL_DECL (__err);					      \
     long int __ret;							      \
 									      \
-    __ret = INTERNAL_SYSCALL (futex, __err, 5,				      \
-			      (futexp), FUTEX_REQUEUE, (nr_wake), (nr_move),  \
-			      (mutex));					      \
-    INTERNAL_SYSCALL_ERROR_P (__ret, __err) ? -__ret : __ret;		      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 6,				      \
+			      (futexp), FUTEX_CMP_REQUEUE, (nr_wake),	      \
+			      (nr_move), (mutex), (val));		      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
   })
 
 #ifdef UP
diff --git a/nptl/sysdeps/unix/sysv/linux/s390/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/s390/bits/pthreadtypes.h
index 02b061b05c..142286451c 100644
--- a/nptl/sysdeps/unix/sysv/linux/s390/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/s390/bits/pthreadtypes.h
@@ -95,11 +95,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
index 8b460eb98f..60679cca86 100644
--- a/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
@@ -29,6 +29,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 /* Initializer for compatibility lock.	*/
 #define LLL_MUTEX_LOCK_INITIALIZER (0)
@@ -82,22 +83,29 @@
   })
 
 
-#define lll_futex_requeue(futex, nr_wake, nr_move, mutex) \
+/* Returns non-zero if error happened, zero if success.  */
+#if 0
+/* FIXME: s390 only supports up to 5 argument syscalls.  Once FUTEX_CMP_REQUEUE
+   kernel interface for s390 is finalized, adjust this.  */
+#define lll_futex_requeue(futex, nr_wake, nr_move, mutex, val) \
   ({									      \
-     register unsigned long int __r2 asm ("2") = (unsigned long int) (futex); \
-     register unsigned long int __r3 asm ("3") = FUTEX_REQUEUE;		      \
-     register unsigned long int __r4 asm ("4") = (long int) (nr_wake);	      \
-     register unsigned long int __r5 asm ("5") = (long int) (nr_move);	      \
-     register unsigned long int __r6 asm ("6") = (unsigned long int) (mutex); \
-     register unsigned long __result asm ("2");				      \
+    register unsigned long int __r2 asm ("2") = (unsigned long int) (futex);  \
+    register unsigned long int __r3 asm ("3") = FUTEX_CMP_REQUEUE;	      \
+    register unsigned long int __r4 asm ("4") = (long int) (nr_wake);	      \
+    register unsigned long int __r5 asm ("5") = (long int) (nr_move);	      \
+    register unsigned long int __r6 asm ("6") = (unsigned long int) (mutex);  \
+    register unsigned long __result asm ("2");				      \
 									      \
     __asm __volatile ("svc %b1"						      \
 		      : "=d" (__result)					      \
 		      : "i" (SYS_futex), "0" (__r2), "d" (__r3),	      \
 			"d" (__r4), "d" (__r5), "d" (__r6)		      \
 		      : "cc", "memory" );				      \
-    __result;								      \
+    __result > -4096UL;							      \
   })
+#else
+#define lll_futex_requeue(futex, nr_wake, nr_move, mutex, val) 1
+#endif
 
 
 #define lll_compare_and_swap(futex, oldval, newval, operation) \
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
index 00ff1b20fb..d9376d45a0 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
@@ -89,6 +89,7 @@ extern int __lll_mutex_unlock_wake (int *__futex) attribute_hidden;
 	    __asm __volatile ("\
 		.align 2\n\
 		mova 1f,r0\n\
+		nop\n\
 		mov r15,r1\n\
 		mov #-8,r15\n\
 	     0: mov.l @%2,%0\n\
@@ -108,6 +109,7 @@ extern int __lll_mutex_unlock_wake (int *__futex) attribute_hidden;
 	    __asm __volatile ("\
 		.align 2\n\
 		mova 1f,r0\n\
+		nop\n\
 		mov r15,r1\n\
 		mov #-8,r15\n\
 	     0: mov.l @%2,%0\n\
@@ -125,6 +127,7 @@ extern int __lll_mutex_unlock_wake (int *__futex) attribute_hidden;
      __asm __volatile ("\
 	.align 2\n\
 	mova 1f,r0\n\
+	nop\n\
 	mov r15,r1\n\
 	mov #-8,r15\n\
      0: mov.l @%2,%0\n\
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/sparc/bits/pthreadtypes.h
index b0c1372bba..e86dd000a1 100644
--- a/nptl/sysdeps/unix/sysv/linux/sparc/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/bits/pthreadtypes.h
@@ -96,11 +96,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
index 05bc5a2974..4626aec524 100644
--- a/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
@@ -29,6 +29,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 /* Initializer for compatibility lock.	*/
 #define LLL_MUTEX_LOCK_INITIALIZER (0)
@@ -63,15 +64,16 @@
     __ret;								      \
   })
 
-#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex) \
+/* Returns non-zero if error happened, zero if success.  */
+#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex, val) \
   ({									      \
     INTERNAL_SYSCALL_DECL (__err);					      \
     long int __ret;							      \
 									      \
-    __ret = INTERNAL_SYSCALL (futex, __err, 5,				      \
-			      (futexp), FUTEX_REQUEUE, (nr_wake), (nr_move),  \
-			      (mutex));					      \
-    __ret;								      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 6,				      \
+			      (futexp), FUTEX_CMP_REQUEUE, (nr_wake),	      \
+			      (nr_move), (mutex), (val));		      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
   })
 
 #ifdef __sparc32_atomic_do_lock
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/x86_64/bits/pthreadtypes.h
index e29c77529e..661e476aa7 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/bits/pthreadtypes.h
@@ -76,11 +76,12 @@ typedef union
   struct
   {
     int __lock;
-    int __clock;
+    unsigned int __futex;
     unsigned long long int __total_seq;
     unsigned long long int __wakeup_seq;
     unsigned long long int __woken_seq;
     void *__mutex;
+    int __clock;
     unsigned int __broadcast_seq;
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
index 6d29d761fa..e8d7bd9bb6 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
@@ -32,6 +32,7 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_REQUEUE		3
+#define FUTEX_CMP_REQUEUE	4
 
 #define EINVAL			22
 
@@ -55,39 +56,42 @@ __pthread_cond_broadcast:
 #endif
 	jnz	1f
 
-2:	addq	$wakeup_seq, %rdi
-	movq	total_seq-wakeup_seq(%rdi), %rcx
-	cmpq	(%rdi), %rcx
+2:	addq	$cond_futex, %rdi
+	movq	total_seq-cond_futex(%rdi), %r9
+	cmpq	wakeup_seq-cond_futex(%rdi), %r9
 	jna	4f
 
 	/* Cause all currently waiting threads to recognize they are
 	   woken up.  */
-	movq	%rcx, (%rdi)
-	movq	%rcx, woken_seq-wakeup_seq(%rdi)
-	incl	broadcast_seq-wakeup_seq(%rdi)
+	movq	%r9, wakeup_seq-cond_futex(%rdi)
+	movq	%r9, woken_seq-cond_futex(%rdi)
+	addq	%r9, %r9
+	movl	%r9d, (%rdi)
+	incl	broadcast_seq-cond_futex(%rdi)
 
 	/* Get the address of the mutex used.  */
-	movq	dep_mutex-wakeup_seq(%rdi), %r8
+	movq	dep_mutex-cond_futex(%rdi), %r8
 
 	/* Unlock.  */
 	LOCK
-	decl	cond_lock-wakeup_seq(%rdi)
+	decl	cond_lock-cond_futex(%rdi)
 	jne	7f
 
 8:	cmpq	$-1, %r8
 	je	9f
 
 	/* Wake up all threads.  */
-	movq	$FUTEX_REQUEUE, %rsi
+	movq	$FUTEX_CMP_REQUEUE, %rsi
 	movq	$SYS_futex, %rax
 	movl	$1, %edx
 	movq	$0x7fffffff, %r10
 	syscall
 
-#ifndef __ASSUME_FUTEX_REQUEUE
-	cmpq	$-EINVAL, %rax
-	je	9f
-#endif
+	/* For any kind of error, which mainly is EAGAIN, we try again
+	   with WAKE.  The general test also covers running on old
+	   kernels.  */
+	cmpq	$-4095, %rax
+	jae	9f
 
 10:	xorl	%eax, %eax
 	retq
@@ -95,7 +99,7 @@ __pthread_cond_broadcast:
 	.align	16
 	/* Unlock.  */
 4:	LOCK
-	decl	cond_lock-wakeup_seq(%rdi)
+	decl	cond_lock-cond_futex(%rdi)
 	jne	5f
 
 6:	xorl	%eax, %eax
@@ -113,14 +117,14 @@ __pthread_cond_broadcast:
 	jmp	2b
 
 	/* Unlock in loop requires wakeup.  */
-5:	addq	$cond_lock-wakeup_seq, %rdi
+5:	addq	$cond_lock-cond_futex, %rdi
 	callq	__lll_mutex_unlock_wake
 	jmp	6b
 
 	/* Unlock in loop requires wakeup.  */
-7:	addq	$cond_lock-wakeup_seq, %rdi
+7:	addq	$cond_lock-cond_futex, %rdi
 	callq	__lll_mutex_unlock_wake
-	subq	$cond_lock-wakeup_seq, %rdi
+	subq	$cond_lock-cond_futex, %rdi
 	jmp	8b
 
 9:	/* The futex requeue functionality is not available.  */
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
index 92eadfc433..62bb74cc1a 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -56,13 +56,14 @@ __pthread_cond_signal:
 #endif
 	jnz	1f
 
-2:	addq	$wakeup_seq, %rdi
+2:	addq	$cond_futex, %rdi
 	movq	total_seq(%r8), %rcx
-	cmpq	(%rdi), %rcx
+	cmpq	wakeup_seq(%r8), %rcx
 	jbe	4f
 
 	/* Bump the wakeup number.  */
-	addq	$1, (%rdi)
+	addq	$1, wakeup_seq(%r8)
+	addl	$1, (%rdi)
 
 	/* Wake up one thread.  */
 	movq	$FUTEX_WAKE, %rsi
@@ -95,11 +96,7 @@ __pthread_cond_signal:
 
 	/* Unlock in loop requires wakeup.  */
 5:
-#if cond_lock != 0
-	addq	$cond_lock-wakeup_seq, %rdi
-#else
 	movq	%r8, %rdi
-#endif
 	callq	__lll_mutex_unlock_wake
 	jmp	6b
 	.size	__pthread_cond_signal, .-__pthread_cond_signal
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 80cbf7e430..e75f05e07f 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -105,6 +105,7 @@ __pthread_cond_timedwait:
 
 	movq	8(%rsp), %rdi
 	incq	total_seq(%rdi)
+	incl	cond_futex(%rdi)
 
 	/* Install cancellation handler.  */
 #ifdef PIC
@@ -118,9 +119,9 @@ __pthread_cond_timedwait:
 
 	/* Get and store current wakeup_seq value.  */
 	movq	8(%rsp), %rdi
-	movq	wakeup_seq(%rdi), %r12
+	movq	wakeup_seq(%rdi), %r9
 	movl	broadcast_seq(%rdi), %edx
-	movq	%r12, 40(%rsp)
+	movq	%r9, 40(%rsp)
 	movl	%edx, 4(%rsp)
 
 	/* Get the current time.  */
@@ -172,6 +173,8 @@ __pthread_cond_timedwait:
 21:	movq	%rcx, 24(%rsp)
 	movq	%rdx, 32(%rsp)
 
+	movl	cond_futex(%rdi), %r12d
+
 	/* Unlock.  */
 	LOCK
 #if cond_lock == 0
@@ -187,7 +190,7 @@ __pthread_cond_timedwait:
 	leaq	24(%rsp), %r10
 	xorq	%rsi, %rsi	/* movq $FUTEX_WAIT, %rsi */
 	movq	%r12, %rdx
-	addq	$wakeup_seq-cond_lock, %rdi
+	addq	$cond_futex, %rdi
 	movq	$SYS_futex, %rax
 	syscall
 	movq	%rax, %r14
@@ -211,21 +214,22 @@ __pthread_cond_timedwait:
 
 	movq	woken_seq(%rdi), %rax
 
-	movq	wakeup_seq(%rdi), %r12
+	movq	wakeup_seq(%rdi), %r9
 
 	cmpl	4(%rsp), %edx
 	jne	23f
 
-	cmpq	40(%rsp), %r12
+	cmpq	40(%rsp), %r9
 	jbe	15f
 
-	cmpq	%rax, %r12
+	cmpq	%rax, %r9
 	ja	9f
 
 15:	cmpq	$-ETIMEDOUT, %r14
 	jne	8b
 
 13:	incq	wakeup_seq(%rdi)
+	incl	cond_futex(%rdi)
 	movq	$ETIMEDOUT, %r14
 	jmp	14f
 
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index 065eb11813..9e7da301d3 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -69,6 +69,8 @@ __condvar_cleanup:
 
 	incq	woken_seq(%rdi)
 
+	incl	cond_futex(%rdi)
+
 3:	LOCK
 #if cond_lock == 0
 	decl	(%rdi)
@@ -82,7 +84,7 @@ __condvar_cleanup:
 	callq	__lll_mutex_unlock_wake
 
 	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	addq	$wakeup_seq, %rdi
+2:	addq	$cond_futex, %rdi
 	movq	$FUTEX_WAKE, %rsi
 	movl	$0x7fffffff, %edx
 	movq	$SYS_futex, %rax
@@ -154,6 +156,7 @@ __pthread_cond_wait:
 
 	movq	8(%rsp), %rdi
 	incq	total_seq(%rdi)
+	incl	cond_futex(%rdi)
 
 	/* Install cancellation handler.  */
 #ifdef PIC
@@ -167,13 +170,14 @@ __pthread_cond_wait:
 
 	/* Get and store current wakeup_seq value.  */
 	movq	8(%rsp), %rdi
-	movq	wakeup_seq(%rdi), %r12
+	movq	wakeup_seq(%rdi), %r9
 	movl	broadcast_seq(%rdi), %edx
-	movq	%r12, 24(%rsp)
+	movq	%r9, 24(%rsp)
 	movl	%edx, 4(%rsp)
 
 	/* Unlock.  */
-8:	LOCK
+8:	movl	cond_futex(%rdi), %r12d
+	LOCK
 #if cond_lock == 0
 	decl	(%rdi)
 #else
@@ -187,7 +191,7 @@ __pthread_cond_wait:
 	movq	8(%rsp), %rdi
 	xorq	%r10, %r10
 	movq	%r12, %rdx
-	addq	$wakeup_seq-cond_lock, %rdi
+	addq	$cond_futex-cond_lock, %rdi
 	movq	$SYS_futex, %rax
 	movq	%r10, %rsi	/* movq $FUTEX_WAIT, %rsi */
 	syscall
@@ -211,15 +215,15 @@ __pthread_cond_wait:
 
 	movq	woken_seq(%rdi), %rax
 
-	movq	wakeup_seq(%rdi), %r12
+	movq	wakeup_seq(%rdi), %r9
 
 	cmpl	4(%rsp), %edx
 	jne	16f
 
-	cmpq	24(%rsp), %r12
+	cmpq	24(%rsp), %r9
 	jbe	8b
 
-	cmpq	%rax, %r12
+	cmpq	%rax, %r9
 	jna	8b
 
 	incq	woken_seq(%rdi)
@@ -359,8 +363,8 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
 	.byte	0x40+.Lsubq-.Lpush_r12		# DW_CFA_advance_loc+N
 	.byte	14				# DW_CFA_def_cfa_offset
 	.uleb128 16+FRAME_SIZE
-	.byte	2				# DW_CFA_advance_loc1
-	.byte	.Laddq-.Lsubq
+	.byte	3				# DW_CFA_advance_loc2
+	.2byte	.Laddq-.Lsubq
 	.byte	14				# DW_CFA_def_cfa_offset
 	.uleb128 16
 	.byte	0x40+.Lpop_r12-.Laddq		# DW_CFA_advance_loc+N
diff --git a/sysdeps/i386/i486/bits/atomic.h b/sysdeps/i386/i486/bits/atomic.h
index 9587eaa87a..7f3d534483 100644
--- a/sysdeps/i386/i486/bits/atomic.h
+++ b/sysdeps/i386/i486/bits/atomic.h
@@ -140,30 +140,31 @@ typedef uintmax_t uatomic_max_t;
 
 
 #define atomic_exchange_and_add(mem, value) \
-  ({ __typeof (*mem) result;						      \
-     __typeof (value) addval = (value);					      \
+  ({ __typeof (*mem) __result;						      \
+     __typeof (value) __addval = (value);				      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (LOCK_PREFIX "xaddb %b0, %1"			      \
-			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (addval), "m" (*mem));			      \
+			 : "=r" (__result), "=m" (*mem)			      \
+			 : "0" (__addval), "m" (*mem));			      \
      else if (sizeof (*mem) == 2)					      \
        __asm __volatile (LOCK_PREFIX "xaddw %w0, %1"			      \
-			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (addval), "m" (*mem));			      \
+			 : "=r" (__result), "=m" (*mem)			      \
+			 : "0" (__addval), "m" (*mem));			      \
      else if (sizeof (*mem) == 4)					      \
        __asm __volatile (LOCK_PREFIX "xaddl %0, %1"			      \
-			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (addval), "m" (*mem));			      \
+			 : "=r" (__result), "=m" (*mem)			      \
+			 : "0" (__addval), "m" (*mem));			      \
      else								      \
        {								      \
-	 __typeof (mem) memp = (mem);					      \
+	 __typeof (mem) __memp = (mem);					      \
+	 __typeof (*mem) __tmpval;					      \
+	 __result = *__memp;						      \
 	 do								      \
-	   result = *memp;						      \
-	 while (__arch_compare_and_exchange_val_64_acq (memp,		      \
-							result + addval,      \
-							result) == result);   \
+	   __tmpval = __result;						      \
+	 while ((__result = __arch_compare_and_exchange_val_64_acq	      \
+		 (__memp, __result + __addval, __result)) == __tmpval);	      \
        }								      \
-     result; })
+     __result; })
 
 
 #define atomic_add(mem, value) \
@@ -185,13 +186,14 @@ typedef uintmax_t uatomic_max_t;
 				: "ir" (value), "m" (*mem));		      \
 	    else							      \
 	      {								      \
-		__typeof (value) addval = (value);			      \
-		__typeof (*mem) oldval;					      \
-		__typeof (mem) memp = (mem);				      \
+		__typeof (value) __addval = (value);			      \
+		__typeof (mem) __memp = (mem);				      \
+		__typeof (*mem) __oldval = *__memp;			      \
+		__typeof (*mem) __tmpval;				      \
 		do							      \
-		  oldval = *memp;					      \
-		while (__arch_compare_and_exchange_val_64_acq		      \
-		       (memp, oldval + addval, oldval) == oldval);	      \
+		  __tmpval = __oldval;					      \
+		while ((__oldval = __arch_compare_and_exchange_val_64_acq     \
+		       (__memp, __oldval + __addval, __oldval)) == __tmpval); \
 	      }								      \
 	    })
 
@@ -249,12 +251,13 @@ typedef uintmax_t uatomic_max_t;
 				: "m" (*mem));				      \
 	    else							      \
 	      {								      \
-		__typeof (*mem) oldval;					      \
-		__typeof (mem) memp = (mem);				      \
+		__typeof (mem) __memp = (mem);				      \
+		__typeof (*mem) __oldval = *__memp;			      \
+		__typeof (*mem) __tmpval;				      \
 		do							      \
-		  oldval = *memp;					      \
-		while (__arch_compare_and_exchange_val_64_acq		      \
-		       (memp, oldval + 1, oldval) == oldval);		      \
+		  __tmpval = __oldval;					      \
+		while ((__oldval = __arch_compare_and_exchange_val_64_acq     \
+		       (__memp, __oldval + 1, __oldval)) == __tmpval);	      \
 	      }								      \
 	    })
 
@@ -293,12 +296,13 @@ typedef uintmax_t uatomic_max_t;
 				: "m" (*mem));				      \
 	    else							      \
 	      {								      \
-		__typeof (*mem) oldval;					      \
-		__typeof (mem) memp = (mem);				      \
+		__typeof (mem) __memp = (mem);				      \
+		__typeof (*mem) __oldval = *__memp;			      \
+		__typeof (*mem) __tmpval;				      \
 		do							      \
-		  oldval = *memp;					      \
-		while (__arch_compare_and_exchange_val_64_acq		      \
-		       (memp, oldval - 1, oldval) == oldval); 		      \
+		  __tmpval = __oldval;					      \
+		while ((__oldval = __arch_compare_and_exchange_val_64_acq     \
+		       (__memp, __oldval - 1, __oldval)) == __tmpval); 	      \
 	      }								      \
 	    })