about summary refs log tree commit diff
path: root/nptl/sysdeps/unix
diff options
context:
space:
mode:
Diffstat (limited to 'nptl/sysdeps/unix')
-rw-r--r--nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h142
-rw-r--r--nptl/sysdeps/unix/sysv/linux/fork.c2
-rw-r--r--nptl/sysdeps/unix/sysv/linux/fork.h4
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/libc-lowlevellock.S15
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S169
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S60
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S36
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S35
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S31
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S56
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S51
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_rdlock.S42
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedrdlock.S49
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedwrlock.S47
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_unlock.S25
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_wrlock.S40
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S10
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S13
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S9
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S10
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h549
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h244
-rw-r--r--nptl/sysdeps/unix/sysv/linux/lowlevellock.c29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c13
-rw-r--r--nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h103
-rw-r--r--nptl/sysdeps/unix/sysv/linux/powerpc/sem_post.c32
-rw-r--r--nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c6
-rw-r--r--nptl/sysdeps/unix/sysv/linux/register-atfork.c12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h158
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sem_post.c3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sem_timedwait.c3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sem_wait.c3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/internaltypes.h18
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h133
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_destroy.c45
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_init.c55
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_wait.c78
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/sparc32/lowlevellock.c25
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/sparc32/pthread_barrier_wait.c34
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/pthread_barrier_wait.c2
-rw-r--r--nptl/sysdeps/unix/sysv/linux/unregister-atfork.c4
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/libc-lowlevellock.S15
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S161
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h555
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S75
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S35
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S31
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S27
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S31
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S39
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S11
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S30
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S25
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S10
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S10
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_trywait.S7
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S10
61 files changed, 1742 insertions, 1824 deletions
diff --git a/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h
index 5f08673c43..f3f291979a 100644
--- a/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/alpha/lowlevellock.h
@@ -70,9 +70,6 @@
 #endif
 
 
-/* Initializer for compatibility lock.	*/
-#define LLL_MUTEX_LOCK_INITIALIZER (0)
-
 #define lll_futex_wait(futexp, val, private) \
   lll_futex_timed_wait (futexp, val, NULL, private)
 
@@ -96,7 +93,7 @@
     INTERNAL_SYSCALL_ERROR_P (__ret, __err)? -__ret : __ret;		      \
   })
 
-#define lll_robust_mutex_dead(futexv) \
+#define lll_robust_dead(futexv) \
   do									      \
     {									      \
       int *__futexp = &(futexv);					      \
@@ -132,149 +129,130 @@
 
 
 static inline int __attribute__((always_inline))
-__lll_mutex_trylock(int *futex)
+__lll_trylock(int *futex)
 {
   return atomic_compare_and_exchange_val_acq (futex, 1, 0) != 0;
 }
-#define lll_mutex_trylock(lock)	__lll_mutex_trylock (&(lock))
+#define lll_trylock(lock)	__lll_trylock (&(lock))
 
 
 static inline int __attribute__((always_inline))
-__lll_mutex_cond_trylock(int *futex)
+__lll_cond_trylock(int *futex)
 {
   return atomic_compare_and_exchange_val_acq (futex, 2, 0) != 0;
 }
-#define lll_mutex_cond_trylock(lock)	__lll_mutex_cond_trylock (&(lock))
+#define lll_cond_trylock(lock)	__lll_cond_trylock (&(lock))
 
 
 static inline int __attribute__((always_inline))
-__lll_robust_mutex_trylock(int *futex, int id)
+__lll_robust_trylock(int *futex, int id)
 {
   return atomic_compare_and_exchange_val_acq (futex, id, 0) != 0;
 }
-#define lll_robust_mutex_trylock(lock, id) \
-  __lll_robust_mutex_trylock (&(lock), id)
+#define lll_robust_trylock(lock, id) \
+  __lll_robust_trylock (&(lock), id)
 
-extern void __lll_lock_wait (int *futex) attribute_hidden;
-extern int __lll_robust_lock_wait (int *futex) attribute_hidden;
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
 
 static inline void __attribute__((always_inline))
-__lll_mutex_lock(int *futex)
+__lll_lock(int *futex, int private)
 {
   if (atomic_compare_and_exchange_bool_acq (futex, 1, 0) != 0)
-    __lll_lock_wait (futex);
+    {
+      if (__builtin_constant_p (private) && private == LLL_PRIVATE)
+	__lll_lock_wait_private (futex);
+      else
+	__lll_lock_wait (futex, private);
+    }
 }
-#define lll_mutex_lock(futex) __lll_mutex_lock (&(futex))
+#define lll_lock(futex, private) __lll_lock (&(futex), private)
 
 
 static inline int __attribute__ ((always_inline))
-__lll_robust_mutex_lock (int *futex, int id)
+__lll_robust_lock (int *futex, int id, int private)
 {
   int result = 0;
   if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
-    result = __lll_robust_lock_wait (futex);
+    result = __lll_robust_lock_wait (futex, private);
   return result;
 }
-#define lll_robust_mutex_lock(futex, id) \
-  __lll_robust_mutex_lock (&(futex), id)
+#define lll_robust_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), id, private)
 
 
 static inline void __attribute__ ((always_inline))
-__lll_mutex_cond_lock (int *futex)
+__lll_cond_lock (int *futex, int private)
 {
   if (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0)
-    __lll_lock_wait (futex);
+    __lll_lock_wait (futex, private);
 }
-#define lll_mutex_cond_lock(futex) __lll_mutex_cond_lock (&(futex))
+#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
 
 
-#define lll_robust_mutex_cond_lock(futex, id) \
-  __lll_robust_mutex_lock (&(futex), (id) | FUTEX_WAITERS)
+#define lll_robust_cond_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), (id) | FUTEX_WAITERS, private)
 
 
-extern int __lll_timedlock_wait (int *futex, const struct timespec *)
-	attribute_hidden;
-extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *)
-	attribute_hidden;
+extern int __lll_timedlock_wait (int *futex, const struct timespec *,
+				 int private) attribute_hidden;
+extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *,
+					int private) attribute_hidden;
 
 static inline int __attribute__ ((always_inline))
-__lll_mutex_timedlock (int *futex, const struct timespec *abstime)
+__lll_timedlock (int *futex, const struct timespec *abstime, int private)
 {
   int result = 0;
   if (atomic_compare_and_exchange_bool_acq (futex, 1, 0) != 0)
-    result = __lll_timedlock_wait (futex, abstime);
+    result = __lll_timedlock_wait (futex, abstime, private);
   return result;
 }
-#define lll_mutex_timedlock(futex, abstime) \
-  __lll_mutex_timedlock (&(futex), abstime)
+#define lll_timedlock(futex, abstime, private) \
+  __lll_timedlock (&(futex), abstime, private)
 
 
 static inline int __attribute__ ((always_inline))
-__lll_robust_mutex_timedlock (int *futex, const struct timespec *abstime,
-			      int id)
+__lll_robust_timedlock (int *futex, const struct timespec *abstime,
+			int id, int private)
 {
   int result = 0;
   if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
     result = __lll_robust_timedlock_wait (futex, abstime);
   return result;
 }
-#define lll_robust_mutex_timedlock(futex, abstime, id) \
-  __lll_robust_mutex_timedlock (&(futex), abstime, id)
+#define lll_robust_timedlock(futex, abstime, id, private) \
+  __lll_robust_timedlock (&(futex), abstime, id, private)
 
 
 static inline void __attribute__ ((always_inline))
-__lll_mutex_unlock (int *futex)
+__lll_unlock (int *futex, int private)
 {
   int val = atomic_exchange_rel (futex, 0);
   if (__builtin_expect (val > 1, 0))
-    lll_futex_wake (futex, 1, LLL_SHARED);
+    lll_futex_wake (futex, 1, private);
 }
-#define lll_mutex_unlock(futex) __lll_mutex_unlock(&(futex))
+#define lll_unlock(futex, private) __lll_unlock(&(futex), private)
 
 
 static inline void __attribute__ ((always_inline))
-__lll_robust_mutex_unlock (int *futex, int mask)
+__lll_robust_unlock (int *futex, int private)
 {
   int val = atomic_exchange_rel (futex, 0);
-  if (__builtin_expect (val & mask, 0))
-    lll_futex_wake (futex, 1, LLL_SHARED);
+  if (__builtin_expect (val & FUTEX_WAITERS, 0))
+    lll_futex_wake (futex, 1, private);
 }
-#define lll_robust_mutex_unlock(futex) \
-  __lll_robust_mutex_unlock(&(futex), FUTEX_WAITERS)
-
+#define lll_robust_unlock(futex, private) \
+  __lll_robust_unlock(&(futex), private)
 
-static inline void __attribute__ ((always_inline))
-__lll_mutex_unlock_force (int *futex)
-{
-  (void) atomic_exchange_rel (futex, 0);
-  lll_futex_wake (futex, 1, LLL_SHARED);
-}
-#define lll_mutex_unlock_force(futex) __lll_mutex_unlock_force(&(futex))
 
-
-#define lll_mutex_islocked(futex) \
+#define lll_islocked(futex) \
   (futex != 0)
 
-
-/* Our internal lock implementation is identical to the binary-compatible
-   mutex implementation. */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
 /* Initializers for lock.  */
 #define LLL_LOCK_INITIALIZER		(0)
 #define LLL_LOCK_INITIALIZER_LOCKED	(1)
 
-/* The states of a lock are:
-    0  -  untaken
-    1  -  taken by one user
-   >1  -  taken by more users */
-
-#define lll_trylock(lock)	lll_mutex_trylock (lock)
-#define lll_lock(lock)		lll_mutex_lock (lock)
-#define lll_unlock(lock)	lll_mutex_unlock (lock)
-#define lll_islocked(lock)	lll_mutex_islocked (lock)
 
 /* The kernel notifies a process which uses CLONE_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
@@ -298,26 +276,4 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
     __res;						\
   })
 
-
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond)
-     attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond)
-     attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond)
-     attribute_hidden;
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/fork.c b/nptl/sysdeps/unix/sysv/linux/fork.c
index f9913c343e..1683de80fd 100644
--- a/nptl/sysdeps/unix/sysv/linux/fork.c
+++ b/nptl/sysdeps/unix/sysv/linux/fork.c
@@ -183,7 +183,7 @@ __libc_fork (void)
 	}
 
       /* Initialize the fork lock.  */
-      __fork_lock = (lll_lock_t) LLL_LOCK_INITIALIZER;
+      __fork_lock = LLL_LOCK_INITIALIZER;
     }
   else
     {
diff --git a/nptl/sysdeps/unix/sysv/linux/fork.h b/nptl/sysdeps/unix/sysv/linux/fork.h
index 6458977b99..032b68f083 100644
--- a/nptl/sysdeps/unix/sysv/linux/fork.h
+++ b/nptl/sysdeps/unix/sysv/linux/fork.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -26,7 +26,7 @@ extern unsigned long int __fork_generation attribute_hidden;
 extern unsigned long int *__fork_generation_pointer attribute_hidden;
 
 /* Lock to protect allocation and deallocation of fork handlers.  */
-extern lll_lock_t __fork_lock attribute_hidden;
+extern int __fork_lock attribute_hidden;
 
 /* Elements of the fork handler lists.  */
 struct fork_handler
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/libc-lowlevellock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/libc-lowlevellock.S
index 830f628578..ce8ad27aa7 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/libc-lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/libc-lowlevellock.S
@@ -17,19 +17,4 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#include <kernel-features.h>
-
-/* All locks in libc are private.  Use the kernel feature if possible.  */
-#define FUTEX_PRIVATE_FLAG	128
-#ifdef __ASSUME_PRIVATE_FUTEX
-# define FUTEX_WAIT		(0 | FUTEX_PRIVATE_FLAG)
-# define FUTEX_WAKE		(1 | FUTEX_PRIVATE_FLAG)
-#else
-# define LOAD_FUTEX_WAIT(reg) \
-	movl	%gs:PRIVATE_FUTEX, reg
-# define LOAD_FUTEX_WAKE(reg) \
-	movl	%gs:PRIVATE_FUTEX, reg ; \
-	orl	$FUTEX_WAKE, reg
-#endif
-
 #include "lowlevellock.S"
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S
index cfcc7dafc4..745ab91239 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S
@@ -19,42 +19,53 @@
 
 #include <sysdep.h>
 #include <pthread-errnos.h>
+#include <kernel-features.h>
+#include <lowlevellock.h>
 
 	.text
 
-#ifndef LOCK
-# ifdef UP
-#  define LOCK
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+	movl	$(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+# define LOAD_PRIVATE_FUTEX_WAKE(reg) \
+	movl	$(FUTEX_WAKE | FUTEX_PRIVATE_FLAG), reg
+# define LOAD_FUTEX_WAIT(reg) \
+	xorl	$(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+# define LOAD_FUTEX_WAKE(reg) \
+	xorl	$(FUTEX_WAKE | FUTEX_PRIVATE_FLAG), reg
+#else
+# if FUTEX_WAIT == 0
+#  define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+	movl	%gs:PRIVATE_FUTEX, reg
 # else
-#  define LOCK lock
+#  define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+	movl	%gs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAIT, reg
 # endif
-#endif
-
-#define SYS_gettimeofday	__NR_gettimeofday
-#define SYS_futex		240
-#ifndef FUTEX_WAIT
-# define FUTEX_WAIT		0
-# define FUTEX_WAKE		1
-#endif
-
-#ifndef LOAD_FUTEX_WAIT
+# define LOAD_PRIVATE_FUTEX_WAKE(reg) \
+	movl	%gs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAKE, reg
 # if FUTEX_WAIT == 0
 #  define LOAD_FUTEX_WAIT(reg) \
-	xorl	reg, reg
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%gs:PRIVATE_FUTEX, reg
 # else
 #  define LOAD_FUTEX_WAIT(reg) \
-	movl	$FUTEX_WAIT, reg
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%gs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAIT, reg
 # endif
 # define LOAD_FUTEX_WAKE(reg) \
-	movl	$FUTEX_WAKE, reg
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%gs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAKE, reg
 #endif
 
-
-	.globl	__lll_mutex_lock_wait
-	.type	__lll_mutex_lock_wait,@function
-	.hidden	__lll_mutex_lock_wait
+	.globl	__lll_lock_wait_private
+	.type	__lll_lock_wait_private,@function
+	.hidden	__lll_lock_wait_private
 	.align	16
-__lll_mutex_lock_wait:
+__lll_lock_wait_private:
 	cfi_startproc
 	pushl	%edx
 	cfi_adjust_cfa_offset(4)
@@ -69,7 +80,7 @@ __lll_mutex_lock_wait:
 	movl	$2, %edx
 	movl	%ecx, %ebx
 	xorl	%esi, %esi	/* No timeout.  */
-	LOAD_FUTEX_WAIT (%ecx)
+	LOAD_PRIVATE_FUTEX_WAIT (%ecx)
 
 	cmpl	%edx, %eax	/* NB:	 %edx == 2 */
 	jne 2f
@@ -94,15 +105,60 @@ __lll_mutex_lock_wait:
 	cfi_restore(%edx)
 	ret
 	cfi_endproc
-	.size	__lll_mutex_lock_wait,.-__lll_mutex_lock_wait
-
+	.size	__lll_lock_wait_private,.-__lll_lock_wait_private
 
 #ifdef NOT_IN_libc
-	.globl	__lll_mutex_timedlock_wait
-	.type	__lll_mutex_timedlock_wait,@function
-	.hidden	__lll_mutex_timedlock_wait
+	.globl	__lll_lock_wait
+	.type	__lll_lock_wait,@function
+	.hidden	__lll_lock_wait
 	.align	16
-__lll_mutex_timedlock_wait:
+__lll_lock_wait:
+	cfi_startproc
+	pushl	%edx
+	cfi_adjust_cfa_offset(4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset(4)
+	pushl	%esi
+	cfi_adjust_cfa_offset(4)
+	cfi_offset(%edx, -8)
+	cfi_offset(%ebx, -12)
+	cfi_offset(%esi, -16)
+
+	movl	%edx, %ebx
+	movl	$2, %edx
+	xorl	%esi, %esi	/* No timeout.  */
+	LOAD_FUTEX_WAIT (%ecx)
+
+	cmpl	%edx, %eax	/* NB:	 %edx == 2 */
+	jne 2f
+
+1:	movl	$SYS_futex, %eax
+	ENTER_KERNEL
+
+2:	movl	%edx, %eax
+	xchgl	%eax, (%ebx)	/* NB:	 lock is implied */
+
+	testl	%eax, %eax
+	jnz	1b
+
+	popl	%esi
+	cfi_adjust_cfa_offset(-4)
+	cfi_restore(%esi)
+	popl	%ebx
+	cfi_adjust_cfa_offset(-4)
+	cfi_restore(%ebx)
+	popl	%edx
+	cfi_adjust_cfa_offset(-4)
+	cfi_restore(%edx)
+	ret
+	cfi_endproc
+	.size	__lll_lock_wait,.-__lll_lock_wait
+
+	.globl	__lll_timedlock_wait
+	.type	__lll_timedlock_wait,@function
+	.hidden	__lll_timedlock_wait
+	.align	16
+__lll_timedlock_wait:
 	cfi_startproc
 	/* Check for a valid timeout value.  */
 	cmpl	$1000000000, 4(%edx)
@@ -132,7 +188,7 @@ __lll_mutex_timedlock_wait:
 	/* Get current time.  */
 	movl	%esp, %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 
 	/* Compute relative timeout.  */
@@ -165,6 +221,7 @@ __lll_mutex_timedlock_wait:
 
 	/* Futex call.  */
 	movl	%esp, %esi
+	movl	16(%esp), %ecx
 	LOAD_FUTEX_WAIT (%ecx)
 	movl	$SYS_futex, %eax
 	ENTER_KERNEL
@@ -215,15 +272,51 @@ __lll_mutex_timedlock_wait:
 5:	movl	$ETIMEDOUT, %eax
 	jmp	6b
 	cfi_endproc
-	.size	__lll_mutex_timedlock_wait,.-__lll_mutex_timedlock_wait
+	.size	__lll_timedlock_wait,.-__lll_timedlock_wait
 #endif
 
+	.globl	__lll_unlock_wake_private
+	.type	__lll_unlock_wake_private,@function
+	.hidden	__lll_unlock_wake_private
+	.align	16
+__lll_unlock_wake_private:
+	cfi_startproc
+	pushl	%ebx
+	cfi_adjust_cfa_offset(4)
+	pushl	%ecx
+	cfi_adjust_cfa_offset(4)
+	pushl	%edx
+	cfi_adjust_cfa_offset(4)
+	cfi_offset(%ebx, -8)
+	cfi_offset(%ecx, -12)
+	cfi_offset(%edx, -16)
+
+	movl	%eax, %ebx
+	movl	$0, (%eax)
+	LOAD_PRIVATE_FUTEX_WAKE (%ecx)
+	movl	$1, %edx	/* Wake one thread.  */
+	movl	$SYS_futex, %eax
+	ENTER_KERNEL
+
+	popl	%edx
+	cfi_adjust_cfa_offset(-4)
+	cfi_restore(%edx)
+	popl	%ecx
+	cfi_adjust_cfa_offset(-4)
+	cfi_restore(%ecx)
+	popl	%ebx
+	cfi_adjust_cfa_offset(-4)
+	cfi_restore(%ebx)
+	ret
+	cfi_endproc
+	.size	__lll_unlock_wake_private,.-__lll_unlock_wake_private
 
-	.globl	__lll_mutex_unlock_wake
-	.type	__lll_mutex_unlock_wake,@function
-	.hidden	__lll_mutex_unlock_wake
+#ifdef NOT_IN_libc
+	.globl	__lll_unlock_wake
+	.type	__lll_unlock_wake,@function
+	.hidden	__lll_unlock_wake
 	.align	16
-__lll_mutex_unlock_wake:
+__lll_unlock_wake:
 	cfi_startproc
 	pushl	%ebx
 	cfi_adjust_cfa_offset(4)
@@ -253,10 +346,8 @@ __lll_mutex_unlock_wake:
 	cfi_restore(%ebx)
 	ret
 	cfi_endproc
-	.size	__lll_mutex_unlock_wake,.-__lll_mutex_unlock_wake
+	.size	__lll_unlock_wake,.-__lll_unlock_wake
 
-
-#ifdef NOT_IN_libc
 	.globl	__lll_timedwait_tid
 	.type	__lll_timedwait_tid,@function
 	.hidden	__lll_timedwait_tid
@@ -274,7 +365,7 @@ __lll_timedwait_tid:
 	/* Get current time.  */
 2:	movl	%esp, %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 
 	/* Compute relative timeout.  */
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
index 73d8bc4ccc..7c2e1d135c 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -19,31 +19,36 @@
 
 #include <sysdep.h>
 #include <pthread-errnos.h>
+#include <lowlevellock.h>
 #include <lowlevelrobustlock.h>
+#include <kernel-features.h>
 
 	.text
 
-#ifndef LOCK
-# ifdef UP
-#  define LOCK
-# else
-#  define LOCK lock
-# endif
-#endif
-
-#define SYS_gettimeofday	__NR_gettimeofday
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
 #define FUTEX_WAITERS		0x80000000
 #define FUTEX_OWNER_DIED	0x40000000
 
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define LOAD_FUTEX_WAIT(reg) \
+	xorl	$(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+#else
+# if FUTEX_WAIT == 0
+#  define LOAD_FUTEX_WAIT(reg) \
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%gs:PRIVATE_FUTEX, reg
+# else
+#  define LOAD_FUTEX_WAIT(reg) \
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%gs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAIT, reg
+# endif
+#endif
 
-	.globl	__lll_robust_mutex_lock_wait
-	.type	__lll_robust_mutex_lock_wait,@function
-	.hidden	__lll_robust_mutex_lock_wait
+	.globl	__lll_robust_lock_wait
+	.type	__lll_robust_lock_wait,@function
+	.hidden	__lll_robust_lock_wait
 	.align	16
-__lll_robust_mutex_lock_wait:
+__lll_robust_lock_wait:
 	cfi_startproc
 	pushl	%edx
 	cfi_adjust_cfa_offset(4)
@@ -55,9 +60,9 @@ __lll_robust_mutex_lock_wait:
 	cfi_offset(%ebx, -12)
 	cfi_offset(%esi, -16)
 
-	movl	%ecx, %ebx
+	movl	%edx, %ebx
 	xorl	%esi, %esi	/* No timeout.  */
-	xorl	%ecx, %ecx	/* movl $FUTEX_WAIT, %ecx */
+	LOAD_FUTEX_WAIT (%ecx)
 
 4:	movl	%eax, %edx
 	orl	$FUTEX_WAITERS, %edx
@@ -98,14 +103,14 @@ __lll_robust_mutex_lock_wait:
 	cfi_restore(%edx)
 	ret
 	cfi_endproc
-	.size	__lll_robust_mutex_lock_wait,.-__lll_robust_mutex_lock_wait
+	.size	__lll_robust_lock_wait,.-__lll_robust_lock_wait
 
 
-	.globl	__lll_robust_mutex_timedlock_wait
-	.type	__lll_robust_mutex_timedlock_wait,@function
-	.hidden	__lll_robust_mutex_timedlock_wait
+	.globl	__lll_robust_timedlock_wait
+	.type	__lll_robust_timedlock_wait,@function
+	.hidden	__lll_robust_timedlock_wait
 	.align	16
-__lll_robust_mutex_timedlock_wait:
+__lll_robust_timedlock_wait:
 	cfi_startproc
 	/* Check for a valid timeout value.  */
 	cmpl	$1000000000, 4(%edx)
@@ -136,7 +141,7 @@ __lll_robust_mutex_timedlock_wait:
 	/* Get current time.  */
 	movl	%esp, %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 
 	/* Compute relative timeout.  */
@@ -177,7 +182,8 @@ __lll_robust_mutex_timedlock_wait:
 2:
 	/* Futex call.  */
 	movl	%esp, %esi
-	xorl	%ecx, %ecx	/* movl $FUTEX_WAIT, %ecx */
+	movl	20(%esp), %ecx
+	LOAD_FUTEX_WAIT (%ecx)
 	movl	$SYS_futex, %eax
 	ENTER_KERNEL
 	movl	%eax, %ecx
@@ -224,4 +230,4 @@ __lll_robust_mutex_timedlock_wait:
 8:	movl	$ETIMEDOUT, %eax
 	jmp	6b
 	cfi_endproc
-	.size	__lll_robust_mutex_timedlock_wait,.-__lll_robust_mutex_timedlock_wait
+	.size	__lll_robust_timedlock_wait,.-__lll_robust_timedlock_wait
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S
index 29857195f0..77d252de8f 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S
@@ -18,19 +18,9 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelbarrier.h>
 
-#define SYS_futex	240
-#define FUTEX_WAIT	0
-#define FUTEX_WAKE	1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	pthread_barrier_wait
@@ -152,19 +142,27 @@ pthread_barrier_wait:
 	popl	%ebx
 	ret
 
-1:	leal	MUTEX(%ebx), %ecx
-	call	__lll_mutex_lock_wait
+1:	movl	PRIVATE(%ebx), %ecx
+	leal	MUTEX(%ebx), %edx
+	xorl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
-4:	leal	MUTEX(%ebx), %eax
-	call	__lll_mutex_unlock_wake
+4:	movl	PRIVATE(%ebx), %ecx
+	leal	MUTEX(%ebx), %eax
+	xorl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	5b
 
-6:	leal	MUTEX(%ebx), %eax
-	call	__lll_mutex_unlock_wake
+6:	movl	PRIVATE(%ebx), %ecx
+	leal	MUTEX(%ebx), %eax
+	xorl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	7b
 
-9:	leal	MUTEX(%ebx), %eax
-	call	__lll_mutex_unlock_wake
+9:	movl	PRIVATE(%ebx), %ecx
+	leal	MUTEX(%ebx), %eax
+	xorl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	10b
 	.size	pthread_barrier_wait,.-pthread_barrier_wait
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S
index 56f7be8246..122d83afee 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -19,24 +19,11 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <kernel-features.h>
 #include <pthread-pi-defines.h>
-
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_REQUEUE		3
-#define FUTEX_CMP_REQUEUE	4
-
-#define EINVAL			22
-
+#include <pthread-errnos.h>
 
 	.text
 
@@ -141,21 +128,27 @@ __pthread_cond_broadcast:
 	/* Initial locking failed.  */
 1:
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 	/* Unlock in loop requires waekup.  */
 5:	leal	cond_lock-cond_futex(%ebx), %eax
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	6b
 
 	/* Unlock in loop requires waekup.  */
 7:	leal	cond_lock-cond_futex(%ebx), %eax
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	8b
 
 9:	/* The futex requeue functionality is not available.  */
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S
index d0f931ff15..e3510c8ab1 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -19,23 +19,10 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <kernel-features.h>
-
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_WAKE_OP		5
-
-#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE	((4 << 24) | 1)
-
-#define EINVAL			22
+#include <pthread-errnos.h>
 
 
 	.text
@@ -119,17 +106,21 @@ __pthread_cond_signal:
 
 	/* Unlock in loop requires wakeup.  */
 5:	movl	%edi, %eax
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	6b
 
 	/* Initial locking failed.  */
 1:
 #if cond_lock == 0
-	movl	%edi, %ecx
+	movl	%edi, %edx
 #else
-	leal	cond_lock(%edi), %ecx
+	leal	cond_lock(%edi), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 	.size	__pthread_cond_signal, .-__pthread_cond_signal
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
index 93f4d56b32..79a7497e8c 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
@@ -19,20 +19,10 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <pthread-errnos.h>
 
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_gettimeofday	__NR_gettimeofday
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
 
 	.text
 
@@ -127,7 +117,7 @@ __pthread_cond_timedwait:
 	/* Get the current time.  */
 	leal	4(%esp), %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 	movl	%edx, %ebx
 
@@ -285,11 +275,13 @@ __pthread_cond_timedwait:
 1:
 .LSbl1:
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 	/* Unlock in loop requires wakeup.  */
@@ -300,17 +292,21 @@ __pthread_cond_timedwait:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	4b
 
 	/* Locking in loop failed.  */
 5:
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	6b
 
 	/* Unlock after loop requires wakeup.  */
@@ -320,7 +316,9 @@ __pthread_cond_timedwait:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	11b
 
 	/* The initial unlocking of the mutex failed.  */
@@ -340,7 +338,9 @@ __pthread_cond_timedwait:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 
 	movl	%esi, %eax
 	jmp	18b
@@ -350,7 +350,7 @@ __pthread_cond_timedwait:
 .LSbl4:
 19:	leal	4(%esp), %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 	movl	%edx, %ebx
 
@@ -396,11 +396,13 @@ __condvar_tw_cleanup:
 	jz	1f
 
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 
 1:	movl	broadcast_seq(%ebx), %eax
 	cmpl	20(%esp), %eax
@@ -457,7 +459,9 @@ __condvar_tw_cleanup:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 
 	/* Wake up all waiters to make sure no signal gets lost.  */
 2:	testl	%edi, %edi
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
index c92cfbc718..68741bffe8 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
@@ -19,19 +19,10 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <tcb-offsets.h>
 
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
 
 	.text
 
@@ -202,11 +193,13 @@ __pthread_cond_wait:
 1:
 .LSbl1:
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 	/* Unlock in loop requires waekup.  */
@@ -217,17 +210,21 @@ __pthread_cond_wait:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	4b
 
 	/* Locking in loop failed.  */
 5:
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 	jmp	6b
 
 	/* Unlock after loop requires wakeup.  */
@@ -237,7 +234,9 @@ __pthread_cond_wait:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 	jmp	11b
 
 	/* The initial unlocking of the mutex failed.  */
@@ -257,7 +256,9 @@ __pthread_cond_wait:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 
 	movl	%esi, %eax
 	jmp	14b
@@ -287,11 +288,13 @@ __condvar_w_cleanup:
 	jz	1f
 
 #if cond_lock == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	cond_lock(%ebx), %ecx
+	leal	cond_lock(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_lock_wait
 
 1:	movl	broadcast_seq(%ebx), %eax
 	cmpl	12(%esp), %eax
@@ -348,7 +351,9 @@ __condvar_w_cleanup:
 #else
 	leal	cond_lock(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %ecx
+	call	__lll_unlock_wake
 
 	/* Wake up all waiters to make sure no signal gets lost.  */
 2:	testl	%edi, %edi
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_rdlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_rdlock.S
index c61c697985..d8a62ed2b7 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_rdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_rdlock.S
@@ -18,21 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 
 
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	__pthread_rwlock_rdlock
@@ -108,7 +98,7 @@ __pthread_rwlock_rdlock:
 13:	subl	$1, READERS_QUEUED(%ebx)
 	jmp	2b
 
-5:	xorl	%ecx, %ecx
+5:	xorl	%edx, %edx
 	addl	$1, NR_READERS(%ebx)
 	je	8f
 9:	LOCK
@@ -120,24 +110,25 @@ __pthread_rwlock_rdlock:
 	jne	6f
 7:
 
-	movl	%ecx, %eax
+	movl	%edx, %eax
 	popl	%ebx
 	popl	%esi
 	ret
 
 1:
 #if MUTEX == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	MUTEX(%ebx), %ecx
+	leal	MUTEX(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 14:	cmpl	%gs:TID, %eax
 	jne	3b
 	/* Deadlock detected.  */
-	movl	$EDEADLK, %ecx
+	movl	$EDEADLK, %edx
 	jmp	9b
 
 6:
@@ -146,17 +137,18 @@ __pthread_rwlock_rdlock:
 #else
 	leal	MUTEX(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_unlock_wake
 	jmp	7b
 
 	/* Overflow.  */
 8:	subl	$1, NR_READERS(%ebx)
-	movl	$EAGAIN, %ecx
+	movl	$EAGAIN, %edx
 	jmp	9b
 
 	/* Overflow.  */
 4:	subl	$1, READERS_QUEUED(%ebx)
-	movl	$EAGAIN, %ecx
+	movl	$EAGAIN, %edx
 	jmp	9b
 
 10:
@@ -165,16 +157,18 @@ __pthread_rwlock_rdlock:
 #else
 	leal	MUTEX(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_unlock_wake
 	jmp	11b
 
 12:
 #if MUTEX == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	MUTEX(%ebx), %ecx
+	leal	MUTEX(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_lock_wait
 	jmp	13b
 	.size	__pthread_rwlock_rdlock,.-__pthread_rwlock_rdlock
 
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedrdlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedrdlock.S
index 6bb67b7262..71b97c60f5 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedrdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedrdlock.S
@@ -18,22 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 
 
-#define SYS_gettimeofday	__NR_gettimeofday
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	pthread_rwlock_timedrdlock
@@ -88,7 +77,7 @@ pthread_rwlock_timedrdlock:
 	/* Get current time.  */
 11:	movl	%esp, %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 
 	/* Compute relative timeout.  */
@@ -142,11 +131,11 @@ pthread_rwlock_timedrdlock:
 	cmpl	$-ETIMEDOUT, %esi
 	jne	2b
 
-18:	movl	$ETIMEDOUT, %ecx
+18:	movl	$ETIMEDOUT, %edx
 	jmp	9f
 
 
-5:	xorl	%ecx, %ecx
+5:	xorl	%edx, %edx
 	addl	$1, NR_READERS(%ebp)
 	je	8f
 9:	LOCK
@@ -157,7 +146,7 @@ pthread_rwlock_timedrdlock:
 #endif
 	jne	6f
 
-7:	movl	%ecx, %eax
+7:	movl	%edx, %eax
 
 	addl	$8, %esp
 	popl	%ebp
@@ -168,16 +157,17 @@ pthread_rwlock_timedrdlock:
 
 1:
 #if MUTEX == 0
-	movl	%ebp, %ecx
+	movl	%ebp, %edx
 #else
-	leal	MUTEX(%ebp), %ecx
+	leal	MUTEX(%ebp), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 14:	cmpl	%gs:TID, %eax
 	jne	3b
-	movl	$EDEADLK, %ecx
+	movl	$EDEADLK, %edx
 	jmp	9b
 
 6:
@@ -186,17 +176,18 @@ pthread_rwlock_timedrdlock:
 #else
 	leal	MUTEX(%ebp), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_unlock_wake
 	jmp	7b
 
 	/* Overflow.  */
 8:	subl	$1, NR_READERS(%ebp)
-	movl	$EAGAIN, %ecx
+	movl	$EAGAIN, %edx
 	jmp	9b
 
 	/* Overflow.  */
 4:	subl	$1, READERS_QUEUED(%ebp)
-	movl	$EAGAIN, %ecx
+	movl	$EAGAIN, %edx
 	jmp	9b
 
 10:
@@ -205,21 +196,23 @@ pthread_rwlock_timedrdlock:
 #else
 	leal	MUTEX(%ebp), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_unlock_wake
 	jmp	11b
 
 12:
 #if MUTEX == 0
-	movl	%ebp, %ecx
+	movl	%ebp, %edx
 #else
-	leal	MUTEX(%ebp), %ecx
+	leal	MUTEX(%ebp), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_lock_wait
 	jmp	13b
 
 16:	movl	$-ETIMEDOUT, %esi
 	jmp	17b
 
-19:	movl	$EINVAL, %ecx
+19:	movl	$EINVAL, %edx
 	jmp	9b
 	.size	pthread_rwlock_timedrdlock,.-pthread_rwlock_timedrdlock
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedwrlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedwrlock.S
index cb5bceefec..c002472085 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedwrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_timedwrlock.S
@@ -18,22 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 
 
-#define SYS_gettimeofday	__NR_gettimeofday
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	pthread_rwlock_timedwrlock
@@ -86,7 +75,7 @@ pthread_rwlock_timedwrlock:
 	/* Get current time.  */
 11:	movl	%esp, %ebx
 	xorl	%ecx, %ecx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 
 	/* Compute relative timeout.  */
@@ -140,11 +129,11 @@ pthread_rwlock_timedwrlock:
 	cmpl	$-ETIMEDOUT, %esi
 	jne	2b
 
-18:	movl	$ETIMEDOUT, %ecx
+18:	movl	$ETIMEDOUT, %edx
 	jmp	9f
 
 
-5:	xorl	%ecx, %ecx
+5:	xorl	%edx, %edx
 	movl	%gs:TID, %eax
 	movl	%eax, WRITER(%ebp)
 9:	LOCK
@@ -155,7 +144,7 @@ pthread_rwlock_timedwrlock:
 #endif
 	jne	6f
 
-7:	movl	%ecx, %eax
+7:	movl	%edx, %eax
 
 	addl	$8, %esp
 	popl	%ebp
@@ -166,16 +155,17 @@ pthread_rwlock_timedwrlock:
 
 1:
 #if MUTEX == 0
-	movl	%ebp, %ecx
+	movl	%ebp, %edx
 #else
-	leal	MUTEX(%ebp), %ecx
+	leal	MUTEX(%ebp), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 14:	cmpl	%gs:TID, %eax
 	jne	3b
-20:	movl	$EDEADLK, %ecx
+20:	movl	$EDEADLK, %edx
 	jmp	9b
 
 6:
@@ -184,12 +174,13 @@ pthread_rwlock_timedwrlock:
 #else
 	leal	MUTEX(%ebp), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_unlock_wake
 	jmp	7b
 
 	/* Overflow.  */
 4:	subl	$1, WRITERS_QUEUED(%ebp)
-	movl	$EAGAIN, %ecx
+	movl	$EAGAIN, %edx
 	jmp	9b
 
 10:
@@ -198,21 +189,23 @@ pthread_rwlock_timedwrlock:
 #else
 	leal	MUTEX(%ebp), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_unlock_wake
 	jmp	11b
 
 12:
 #if MUTEX == 0
-	movl	%ebp, %ecx
+	movl	%ebp, %edx
 #else
-	leal	MUTEX(%ebp), %ecx
+	leal	MUTEX(%ebp), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebp), %ecx
+	call	__lll_lock_wait
 	jmp	13b
 
 16:	movl	$-ETIMEDOUT, %esi
 	jmp	17b
 
-19:	movl	$EINVAL, %ecx
+19:	movl	$EINVAL, %edx
 	jmp	9b
 	.size	pthread_rwlock_timedwrlock,.-pthread_rwlock_timedwrlock
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_unlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_unlock.S
index ed977aa141..fdad432e30 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_unlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_unlock.S
@@ -18,20 +18,10 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 
 
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	__pthread_rwlock_unlock
@@ -115,11 +105,12 @@ __pthread_rwlock_unlock:
 
 1:
 #if MUTEX == 0
-	movl	%edi, %ecx
+	movl	%edi, %edx
 #else
-	leal	MUTEX(%edi), %ecx
+	leal	MUTEX(%edi), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%edi), %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 3:
@@ -128,7 +119,8 @@ __pthread_rwlock_unlock:
 #else
 	leal	MUTEX(%edi), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%edi), %ecx
+	call	__lll_unlock_wake
 	jmp	4b
 
 7:
@@ -137,7 +129,8 @@ __pthread_rwlock_unlock:
 #else
 	leal	MUTEX(%edi), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%edi), %ecx
+	call	__lll_unlock_wake
 	jmp	8b
 
 	.size	__pthread_rwlock_unlock,.-__pthread_rwlock_unlock
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_wrlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_wrlock.S
index 88044c040b..3f55c82930 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_wrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_rwlock_wrlock.S
@@ -18,21 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 
 
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	__pthread_rwlock_wrlock
@@ -106,7 +96,7 @@ __pthread_rwlock_wrlock:
 13:	subl	$1, WRITERS_QUEUED(%ebx)
 	jmp	2b
 
-5:	xorl	%ecx, %ecx
+5:	xorl	%edx, %edx
 	movl	%gs:TID, %eax
 	movl	%eax, WRITER(%ebx)
 9:	LOCK
@@ -118,23 +108,24 @@ __pthread_rwlock_wrlock:
 	jne	6f
 7:
 
-	movl	%ecx, %eax
+	movl	%edx, %eax
 	popl	%ebx
 	popl	%esi
 	ret
 
 1:
 #if MUTEX == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	MUTEX(%ebx), %ecx
+	leal	MUTEX(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_lock_wait
 	jmp	2b
 
 14:	cmpl	%gs:TID	, %eax
 	jne	3b
-	movl	$EDEADLK, %ecx
+	movl	$EDEADLK, %edx
 	jmp	9b
 
 6:
@@ -143,11 +134,12 @@ __pthread_rwlock_wrlock:
 #else
 	leal	MUTEX(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_unlock_wake
 	jmp	7b
 
 4:	subl	$1, WRITERS_QUEUED(%ebx)
-	movl	$EAGAIN, %ecx
+	movl	$EAGAIN, %edx
 	jmp	9b
 
 10:
@@ -156,16 +148,18 @@ __pthread_rwlock_wrlock:
 #else
 	leal	MUTEX(%ebx), %eax
 #endif
-	call	__lll_mutex_unlock_wake
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_unlock_wake
 	jmp	11b
 
 12:
 #if MUTEX == 0
-	movl	%ebx, %ecx
+	movl	%ebx, %edx
 #else
-	leal	MUTEX(%ebx), %ecx
+	leal	MUTEX(%ebx), %edx
 #endif
-	call	__lll_mutex_lock_wait
+	movl	PSHARED(%ebx), %ecx
+	call	__lll_lock_wait
 	jmp	13b
 	.size	__pthread_rwlock_wrlock,.-__pthread_rwlock_wrlock
 
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
index 280dc2fe27..8f656b4e0f 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
@@ -21,15 +21,7 @@
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <structsem.h>
-
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
-#define SYS_futex		240
-#define FUTEX_WAKE		1
+#include <lowlevellock.h>
 
 
 	.text
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
index 57b5b58186..13a36d64cc 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
@@ -21,16 +21,7 @@
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <structsem.h>
-
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
-#define SYS_gettimeofday	__NR_gettimeofday
-#define SYS_futex		240
-#define FUTEX_WAIT		0
+#include <lowlevellock.h>
 
 
 #if VALUE != 0
@@ -82,7 +73,7 @@ sem_timedwait:
 7:	xorl	%ecx, %ecx
 	movl	%esp, %ebx
 	movl	%ecx, %edx
-	movl	$SYS_gettimeofday, %eax
+	movl	$__NR_gettimeofday, %eax
 	ENTER_KERNEL
 
 	/* Compute relative timeout.  */
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
index fbc3b3c932..2d49934f02 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -20,12 +20,7 @@
 #include <sysdep.h>
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
-
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
+#include <lowlevellock.h>
 
 	.text
 
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
index d0eef75144..93c0a64a9e 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
@@ -21,15 +21,7 @@
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <structsem.h>
-
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
-#define SYS_futex		240
-#define FUTEX_WAIT		0
+#include <lowlevellock.h>
 
 
 #if VALUE != 0
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
index 56f63a4623..2f663aa68b 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
@@ -20,28 +20,41 @@
 #ifndef _LOWLEVELLOCK_H
 #define _LOWLEVELLOCK_H	1
 
-#include <time.h>
-#include <sys/param.h>
-#include <bits/pthreadtypes.h>
-#include <kernel-features.h>
-#include <tcb-offsets.h>
-
-#ifndef LOCK_INSTR
-# ifdef UP
-#  define LOCK_INSTR	/* nothing */
-# else
-#  define LOCK_INSTR "lock;"
+#ifndef __ASSEMBLER__
+# include <time.h>
+# include <sys/param.h>
+# include <bits/pthreadtypes.h>
+# include <kernel-features.h>
+# include <tcb-offsets.h>
+
+# ifndef LOCK_INSTR
+#  ifdef UP
+#   define LOCK_INSTR	/* nothing */
+#  else
+#   define LOCK_INSTR "lock;"
+#  endif
+# endif
+#else
+# ifndef LOCK
+#  ifdef UP
+#   define LOCK
+#  else
+#   define LOCK lock
+#  endif
 # endif
 #endif
 
 #define SYS_futex		240
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_CMP_REQUEUE	4
+#define FUTEX_WAKE_OP		5
 #define FUTEX_LOCK_PI		6
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_PRIVATE_FLAG	128
 
+#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE	((4 << 24) | 1)
 
 /* Values for 'private' parameter of locking macros.  Yes, the
    definition seems to be backwards.  But it is not.  The bit will be
@@ -76,11 +89,12 @@
 # endif	      
 #endif
 
+#ifndef __ASSEMBLER__
 
 /* Initializer for compatibility lock.  */
-#define LLL_MUTEX_LOCK_INITIALIZER		(0)
-#define LLL_MUTEX_LOCK_INITIALIZER_LOCKED	(1)
-#define LLL_MUTEX_LOCK_INITIALIZER_WAITERS	(2)
+#define LLL_LOCK_INITIALIZER		(0)
+#define LLL_LOCK_INITIALIZER_LOCKED	(1)
+#define LLL_LOCK_INITIALIZER_WAITERS	(2)
 
 
 #ifdef PIC
@@ -102,7 +116,7 @@
 #endif
 
 /* Delay in spinlock loop.  */
-#define BUSY_WAIT_NOP          asm ("rep; nop")
+#define BUSY_WAIT_NOP	asm ("rep; nop")
 
 
 #define LLL_STUB_UNWIND_INFO_START \
@@ -217,332 +231,309 @@ LLL_STUB_UNWIND_INFO_END
   } while (0)
 
 
-/* Does not preserve %eax and %ecx.  */
-extern int __lll_mutex_lock_wait (int val, int *__futex)
-     __attribute ((regparm (2))) attribute_hidden;
-/* Does not preserve %eax, %ecx, and %edx.  */
-extern int __lll_mutex_timedlock_wait (int val, int *__futex,
-				       const struct timespec *abstime)
-     __attribute ((regparm (3))) attribute_hidden;
-/* Preserves all registers but %eax.  */
-extern int __lll_mutex_unlock_wake (int *__futex)
-     __attribute ((regparm (1))) attribute_hidden;
-
-
-/* NB: in the lll_mutex_trylock macro we simply return the value in %eax
+/* NB: in the lll_trylock macro we simply return the value in %eax
    after the cmpxchg instruction.  In case the operation succeded this
    value is zero.  In case the operation failed, the cmpxchg instruction
    has loaded the current value of the memory work which is guaranteed
    to be nonzero.  */
-#define lll_mutex_trylock(futex) \
+#if defined NOT_IN_libc || defined UP
+# define __lll_trylock_asm LOCK_INSTR "cmpxchgl %2, %1"
+#else
+# define __lll_trylock_asm "cmpl $0, %%gs:%P5\n\t" \
+			   "je 0f\n\t"					      \
+			   "lock\n"					      \
+			   "0:\tcmpxchgl %2, %1"
+#endif
+
+#define lll_trylock(futex) \
   ({ int ret;								      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
+     __asm __volatile (__lll_trylock_asm				      \
 		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_MUTEX_LOCK_INITIALIZER_LOCKED), "m" (futex),\
-			 "0" (LLL_MUTEX_LOCK_INITIALIZER)		      \
+		       : "r" (LLL_LOCK_INITIALIZER_LOCKED), "m" (futex),      \
+			 "0" (LLL_LOCK_INITIALIZER),			      \
+			 "i" (MULTIPLE_THREADS_OFFSET)			      \
 		       : "memory");					      \
      ret; })
 
-
-#define lll_robust_mutex_trylock(futex, id) \
+#define lll_robust_trylock(futex, id) \
   ({ int ret;								      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
 		       : "=a" (ret), "=m" (futex)			      \
 		       : "r" (id), "m" (futex),				      \
-			 "0" (LLL_MUTEX_LOCK_INITIALIZER)		      \
+			 "0" (LLL_LOCK_INITIALIZER)			      \
 		       : "memory");					      \
      ret; })
 
 
-#define lll_mutex_cond_trylock(futex) \
+#define lll_cond_trylock(futex) \
   ({ int ret;								      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
 		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_MUTEX_LOCK_INITIALIZER_WAITERS),	      \
-			  "m" (futex), "0" (LLL_MUTEX_LOCK_INITIALIZER)	      \
+		       : "r" (LLL_LOCK_INITIALIZER_WAITERS),		      \
+			 "m" (futex), "0" (LLL_LOCK_INITIALIZER)	      \
 		       : "memory");					      \
      ret; })
 
+#if defined NOT_IN_libc || defined UP
+# define __lll_lock_asm_start LOCK_INSTR "cmpxchgl %1, %2\n\t"
+#else
+# define __lll_lock_asm_start "cmpl $0, %%gs:%P6\n\t"			      \
+			      "je 0f\n\t"				      \
+			      "lock\n"					      \
+			      "0:\tcmpxchgl %1, %2\n\t"
+#endif
 
-#define lll_mutex_lock(futex) \
-  (void) ({ int ignore1, ignore2;					      \
-	    __asm __volatile (LOCK_INSTR "cmpxchgl %1, %2\n\t"		      \
-			      "jnz _L_mutex_lock_%=\n\t"		      \
-			      ".subsection 1\n\t"			      \
-			      ".type _L_mutex_lock_%=,@function\n"	      \
-			      "_L_mutex_lock_%=:\n"			      \
-			      "1:\tleal %2, %%ecx\n"			      \
-			      "2:\tcall __lll_mutex_lock_wait\n"	      \
-			      "3:\tjmp 18f\n"				      \
-			      "4:\t.size _L_mutex_lock_%=, 4b-1b\n\t"	      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_3			      \
-			      "18:"					      \
-			      : "=a" (ignore1), "=c" (ignore2), "=m" (futex)  \
-			      : "0" (0), "1" (1), "m" (futex)		      \
-			      : "memory"); })
-
-
-#define lll_robust_mutex_lock(futex, id) \
-  ({ int result, ignore;						      \
+#define lll_lock(futex, private) \
+  (void)								      \
+    ({ int ignore1, ignore2;						      \
+       if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	 __asm __volatile (__lll_lock_asm_start				      \
+			   "jnz _L_lock_%=\n\t"				      \
+			   ".subsection 1\n\t"				      \
+			   ".type _L_lock_%=,@function\n"		      \
+			   "_L_lock_%=:\n"				      \
+			   "1:\tleal %2, %%ecx\n"			      \
+			   "2:\tcall __lll_lock_wait_private\n" 	      \
+			   "3:\tjmp 18f\n"				      \
+			   "4:\t.size _L_lock_%=, 4b-1b\n\t"		      \
+			   ".previous\n"				      \
+			   LLL_STUB_UNWIND_INFO_3			      \
+			   "18:"					      \
+			   : "=a" (ignore1), "=c" (ignore2), "=m" (futex)     \
+			   : "0" (0), "1" (1), "m" (futex),		      \
+			     "i" (MULTIPLE_THREADS_OFFSET)		      \
+			   : "memory");					      \
+       else								      \
+	 {								      \
+	   int ignore3;							      \
+	   __asm __volatile (__lll_lock_asm_start			      \
+			     "jnz _L_lock_%=\n\t"			      \
+			     ".subsection 1\n\t"			      \
+			     ".type _L_lock_%=,@function\n"		      \
+			     "_L_lock_%=:\n"				      \
+			     "1:\tleal %2, %%edx\n"			      \
+			     "0:\tmovl %8, %%ecx\n"			      \
+			     "2:\tcall __lll_lock_wait\n"		      \
+			     "3:\tjmp 18f\n"				      \
+			     "4:\t.size _L_lock_%=, 4b-1b\n\t"		      \
+			     ".previous\n"				      \
+			     LLL_STUB_UNWIND_INFO_4			      \
+			     "18:"					      \
+			     : "=a" (ignore1), "=c" (ignore2),		      \
+			       "=m" (futex), "=&d" (ignore3) 		      \
+			     : "1" (1), "m" (futex),			      \
+			       "i" (MULTIPLE_THREADS_OFFSET), "0" (0),	      \
+			       "g" (private)				      \
+			     : "memory");				      \
+	 }								      \
+    })
+
+#define lll_robust_lock(futex, id, private) \
+  ({ int result, ignore1, ignore2;					      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %1, %2\n\t"			      \
-		       "jnz _L_robust_mutex_lock_%=\n\t"		      \
+		       "jnz _L_robust_lock_%=\n\t"			      \
 		       ".subsection 1\n\t"				      \
-		       ".type _L_robust_mutex_lock_%=,@function\n"	      \
-		       "_L_robust_mutex_lock_%=:\n"			      \
-		       "1:\tleal %2, %%ecx\n"				      \
-		       "2:\tcall __lll_robust_mutex_lock_wait\n"	      \
+		       ".type _L_robust_lock_%=,@function\n"		      \
+		       "_L_robust_lock_%=:\n"				      \
+		       "1:\tleal %2, %%edx\n"				      \
+		       "0:\tmovl %7, %%ecx\n"				      \
+		       "2:\tcall __lll_robust_lock_wait\n"		      \
 		       "3:\tjmp 18f\n"					      \
-		       "4:\t.size _L_robust_mutex_lock_%=, 4b-1b\n\t"	      \
+		       "4:\t.size _L_robust_lock_%=, 4b-1b\n\t"		      \
 		       ".previous\n"					      \
-		       LLL_STUB_UNWIND_INFO_3				      \
+		       LLL_STUB_UNWIND_INFO_4				      \
 		       "18:"						      \
-		       : "=a" (result), "=c" (ignore), "=m" (futex)	      \
-		       : "0" (0), "1" (id), "m" (futex)			      \
+		       : "=a" (result), "=c" (ignore1), "=m" (futex),	      \
+			 "=&d" (ignore2)				      \
+		       : "0" (0), "1" (id), "m" (futex), "g" (private)	      \
 		       : "memory");					      \
      result; })
 
 
-/* Special version of lll_mutex_lock which causes the unlock function to
+/* Special version of lll_lock which causes the unlock function to
    always wakeup waiters.  */
-#define lll_mutex_cond_lock(futex) \
-  (void) ({ int ignore1, ignore2;					      \
-	    __asm __volatile (LOCK_INSTR "cmpxchgl %1, %2\n\t"		      \
-			      "jnz _L_mutex_cond_lock_%=\n\t"		      \
-			      ".subsection 1\n\t"			      \
-			      ".type _L_mutex_cond_lock_%=,@function\n"	      \
-			      "_L_mutex_cond_lock_%=:\n"		      \
-			      "1:\tleal %2, %%ecx\n"			      \
-			      "2:\tcall __lll_mutex_lock_wait\n"	      \
-			      "3:\tjmp 18f\n"				      \
-			      "4:\t.size _L_mutex_cond_lock_%=, 4b-1b\n\t"    \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_3			      \
-			      "18:"					      \
-			      : "=a" (ignore1), "=c" (ignore2), "=m" (futex)  \
-			      : "0" (0), "1" (2), "m" (futex)		      \
-			      : "memory"); })
-
-
-#define lll_robust_mutex_cond_lock(futex, id) \
-  ({ int result, ignore;						      \
+#define lll_cond_lock(futex, private) \
+  (void)								      \
+    ({ int ignore1, ignore2, ignore3;					      \
+       __asm __volatile (LOCK_INSTR "cmpxchgl %1, %2\n\t"		      \
+			 "jnz _L_cond_lock_%=\n\t"			      \
+			 ".subsection 1\n\t"				      \
+			 ".type _L_cond_lock_%=,@function\n"		      \
+			 "_L_cond_lock_%=:\n"				      \
+			 "1:\tleal %2, %%edx\n"				      \
+			 "0:\tmovl %7, %%ecx\n"				      \
+			 "2:\tcall __lll_lock_wait\n"			      \
+			 "3:\tjmp 18f\n"				      \
+			 "4:\t.size _L_cond_lock_%=, 4b-1b\n\t"		      \
+			 ".previous\n"					      \
+			 LLL_STUB_UNWIND_INFO_4				      \
+			 "18:"						      \
+			 : "=a" (ignore1), "=c" (ignore2), "=m" (futex),      \
+			   "=&d" (ignore3)				      \
+			 : "0" (0), "1" (2), "m" (futex), "g" (private)	      \
+			 : "memory");					      \
+    })
+
+
+#define lll_robust_cond_lock(futex, id, private) \
+  ({ int result, ignore1, ignore2;					      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %1, %2\n\t"			      \
-		       "jnz _L_robust_mutex_cond_lock_%=\n\t"		      \
+		       "jnz _L_robust_cond_lock_%=\n\t"			      \
 		       ".subsection 1\n\t"				      \
-		       ".type _L_robust_mutex_cond_lock_%=,@function\n"	      \
-		       "_L_robust_mutex_cond_lock_%=:\n"		      \
-		       "1:\tleal %2, %%ecx\n"				      \
-		       "2:\tcall __lll_robust_mutex_lock_wait\n"	      \
+		       ".type _L_robust_cond_lock_%=,@function\n"	      \
+		       "_L_robust_cond_lock_%=:\n"			      \
+		       "1:\tleal %2, %%edx\n"				      \
+		       "0:\tmovl %7, %%ecx\n"				      \
+		       "2:\tcall __lll_robust_lock_wait\n"		      \
 		       "3:\tjmp 18f\n"					      \
-		       "4:\t.size _L_robust_mutex_cond_lock_%=, 4b-1b\n\t"    \
+		       "4:\t.size _L_robust_cond_lock_%=, 4b-1b\n\t"	      \
 		       ".previous\n"					      \
-		       LLL_STUB_UNWIND_INFO_3				      \
+		       LLL_STUB_UNWIND_INFO_4				      \
 		       "18:"						      \
-		       : "=a" (result), "=c" (ignore), "=m" (futex)	      \
-		       : "0" (0), "1" (id | FUTEX_WAITERS), "m" (futex)	      \
+		       : "=a" (result), "=c" (ignore1), "=m" (futex),	      \
+			 "=&d" (ignore2)				      \
+		       : "0" (0), "1" (id | FUTEX_WAITERS), "m" (futex),      \
+			 "g" (private)					      \
 		       : "memory");					      \
      result; })
 
 
-#define lll_mutex_timedlock(futex, timeout) \
-  ({ int result, ignore1, ignore2;					      \
+#define lll_timedlock(futex, timeout, private) \
+  ({ int result, ignore1, ignore2, ignore3;				      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %1, %3\n\t"			      \
-		       "jnz _L_mutex_timedlock_%=\n\t"			      \
+		       "jnz _L_timedlock_%=\n\t"			      \
 		       ".subsection 1\n\t"				      \
-		       ".type _L_mutex_timedlock_%=,@function\n"	      \
-		       "_L_mutex_timedlock_%=:\n"			      \
+		       ".type _L_timedlock_%=,@function\n"		      \
+		       "_L_timedlock_%=:\n"				      \
 		       "1:\tleal %3, %%ecx\n"				      \
-		       "0:\tmovl %7, %%edx\n"				      \
-		       "2:\tcall __lll_mutex_timedlock_wait\n"		      \
+		       "0:\tmovl %8, %%edx\n"				      \
+		       "2:\tcall __lll_timedlock_wait\n"		      \
 		       "3:\tjmp 18f\n"					      \
-		       "4:\t.size _L_mutex_timedlock_%=, 4b-1b\n\t"	      \
+		       "4:\t.size _L_timedlock_%=, 4b-1b\n\t"		      \
 		       ".previous\n"					      \
 		       LLL_STUB_UNWIND_INFO_4				      \
 		       "18:"						      \
 		       : "=a" (result), "=c" (ignore1), "=&d" (ignore2),      \
-			 "=m" (futex)					      \
-		       : "0" (0), "1" (1), "m" (futex), "m" (timeout)	      \
+			 "=m" (futex), "=S" (ignore3)			      \
+		       : "0" (0), "1" (1), "m" (futex), "m" (timeout),	      \
+			 "4" (private)					      \
 		       : "memory");					      \
      result; })
 
 
-#define lll_robust_mutex_timedlock(futex, timeout, id) \
-  ({ int result, ignore1, ignore2;					      \
+#define lll_robust_timedlock(futex, timeout, id, private) \
+  ({ int result, ignore1, ignore2, ignore3;				      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %1, %3\n\t"			      \
-		       "jnz _L_robust_mutex_timedlock_%=\n\t"		      \
+		       "jnz _L_robust_timedlock_%=\n\t"			      \
 		       ".subsection 1\n\t"				      \
-		       ".type _L_robust_mutex_timedlock_%=,@function\n"	      \
-		       "_L_robust_mutex_timedlock_%=:\n"		      \
+		       ".type _L_robust_timedlock_%=,@function\n"	      \
+		       "_L_robust_timedlock_%=:\n"			      \
 		       "1:\tleal %3, %%ecx\n"				      \
-		       "0:\tmovl %7, %%edx\n"				      \
-		       "2:\tcall __lll_robust_mutex_timedlock_wait\n"	      \
+		       "0:\tmovl %8, %%edx\n"				      \
+		       "2:\tcall __lll_robust_timedlock_wait\n"		      \
 		       "3:\tjmp 18f\n"					      \
-		       "4:\t.size _L_robust_mutex_timedlock_%=, 4b-1b\n\t"    \
+		       "4:\t.size _L_robust_timedlock_%=, 4b-1b\n\t"	      \
 		       ".previous\n"					      \
 		       LLL_STUB_UNWIND_INFO_4				      \
 		       "18:"						      \
 		       : "=a" (result), "=c" (ignore1), "=&d" (ignore2),      \
-			 "=m" (futex)					      \
-		       : "0" (0), "1" (id), "m" (futex), "m" (timeout)	      \
+			 "=m" (futex), "=S" (ignore3)			      \
+		       : "0" (0), "1" (id), "m" (futex), "m" (timeout),	      \
+			 "4" (private)					      \
 		       : "memory");					      \
      result; })
 
-
-#define lll_mutex_unlock(futex) \
-  (void) ({ int ignore;							      \
-            __asm __volatile (LOCK_INSTR "subl $1, %0\n\t"		      \
-			      "jne _L_mutex_unlock_%=\n\t"		      \
-			      ".subsection 1\n\t"			      \
-			      ".type _L_mutex_unlock_%=,@function\n"	      \
-			      "_L_mutex_unlock_%=:\n"			      \
-			      "1:\tleal %0, %%eax\n"			      \
-			      "2:\tcall __lll_mutex_unlock_wake\n"	      \
-			      "3:\tjmp 18f\n"				      \
-			      "4:\t.size _L_mutex_unlock_%=, 4b-1b\n\t"	      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_3			      \
-			      "18:"					      \
-			      : "=m" (futex), "=&a" (ignore)		      \
-			      : "m" (futex)				      \
-			      : "memory"); })
-
-
-#define lll_robust_mutex_unlock(futex) \
-  (void) ({ int ignore;							      \
-            __asm __volatile (LOCK_INSTR "andl %2, %0\n\t"		      \
-			      "jne _L_robust_mutex_unlock_%=\n\t"	      \
-			      ".subsection 1\n\t"			      \
-			      ".type _L_robust_mutex_unlock_%=,@function\n"   \
-			      "_L_robust_mutex_unlock_%=:\n\t"		      \
-			      "1:\tleal %0, %%eax\n"			      \
-			      "2:\tcall __lll_mutex_unlock_wake\n"	      \
-			      "3:\tjmp 18f\n"				      \
-			      "4:\t.size _L_robust_mutex_unlock_%=, 4b-1b\n\t"\
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_3			      \
-			      "18:"					      \
-			      : "=m" (futex), "=&a" (ignore)		      \
-			      : "i" (FUTEX_WAITERS), "m" (futex)	      \
-			      : "memory"); })
-
-
-#define lll_robust_mutex_dead(futex) \
-  (void) ({ int __ignore;						      \
-	    register int _nr asm ("edx") = 1;				      \
-	    __asm __volatile (LOCK_INSTR "orl %5, (%2)\n\t"		      \
-			      LLL_EBX_LOAD				      \
-			      LLL_ENTER_KERNEL				      \
-			      LLL_EBX_LOAD				      \
-			      : "=a" (__ignore)				      \
-			      : "0" (SYS_futex), LLL_EBX_REG (&(futex)),      \
-				"c" (FUTEX_WAKE), "d" (_nr),		      \
-				"i" (FUTEX_OWNER_DIED),			      \
-				"i" (offsetof (tcbhead_t, sysinfo))); })
-
-
-#define lll_mutex_islocked(futex) \
-  (futex != 0)
-
-
-/* We have a separate internal lock implementation which is not tied
-   to binary compatibility.  */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
-/* Initializers for lock.  */
-#define LLL_LOCK_INITIALIZER		(0)
-#define LLL_LOCK_INITIALIZER_LOCKED	(1)
-
-
-extern int __lll_lock_wait (int val, int *__futex)
-     __attribute ((regparm (2))) attribute_hidden;
-extern int __lll_unlock_wake (int *__futex)
-     __attribute ((regparm (1))) attribute_hidden;
-
-
-/* The states of a lock are:
-    0  -  untaken
-    1  -  taken by one user
-    2  -  taken by more users */
-
-
 #if defined NOT_IN_libc || defined UP
-# define lll_trylock(futex) lll_mutex_trylock (futex)
-# define lll_lock(futex) lll_mutex_lock (futex)
-# define lll_unlock(futex) lll_mutex_unlock (futex)
+# define __lll_unlock_asm LOCK_INSTR "subl $1, %0\n\t"
 #else
-/* Special versions of the macros for use in libc itself.  They avoid
-   the lock prefix when the thread library is not used.
-
-   XXX In future we might even want to avoid it on UP machines.  */
-# include <tls.h>
-
-# define lll_trylock(futex) \
-  ({ unsigned char ret;							      \
-     __asm __volatile ("cmpl $0, %%gs:%P5\n\t"				      \
-		       "je 0f\n\t"					      \
-		       "lock\n"						      \
-		       "0:\tcmpxchgl %2, %1; setne %0"			      \
-		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_MUTEX_LOCK_INITIALIZER_LOCKED), "m" (futex),\
-			 "0" (LLL_MUTEX_LOCK_INITIALIZER),		      \
-		         "i" (offsetof (tcbhead_t, multiple_threads))	      \
-		       : "memory");					      \
-     ret; })
-
-
-# define lll_lock(futex) \
-  (void) ({ int ignore1, ignore2;					      \
-	    __asm __volatile ("cmpl $0, %%gs:%P6\n\t"			      \
-			      "je 0f\n\t"				      \
-			      "lock\n"					      \
-			      "0:\tcmpxchgl %1, %2\n\t"			      \
-			      "jnz _L_lock_%=\n\t"			      \
-			      ".subsection 1\n\t"			      \
-			      ".type _L_lock_%=,@function\n"		      \
-			      "_L_lock_%=:\n"				      \
-			      "1:\tleal %2, %%ecx\n"			      \
-			      "2:\tcall __lll_mutex_lock_wait\n"	      \
-			      "3:\tjmp 18f\n"				      \
-			      "4:\t.size _L_lock_%=, 4b-1b\n\t"		      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_3			      \
-			      "18:"					      \
-			      : "=a" (ignore1), "=c" (ignore2), "=m" (futex)  \
-			      : "0" (0), "1" (1), "m" (futex),		      \
-		                "i" (offsetof (tcbhead_t, multiple_threads))  \
-			      : "memory"); })
-
-
-# define lll_unlock(futex) \
-  (void) ({ int ignore;							      \
-            __asm __volatile ("cmpl $0, %%gs:%P3\n\t"			      \
-			      "je 0f\n\t"				      \
-			      "lock\n"					      \
-			      "0:\tsubl $1,%0\n\t"			      \
-			      "jne _L_unlock_%=\n\t"			      \
-			      ".subsection 1\n\t"			      \
-			      ".type _L_unlock_%=,@function\n"		      \
-			      "_L_unlock_%=:\n"				      \
-			      "1:\tleal %0, %%eax\n"			      \
-			      "2:\tcall __lll_mutex_unlock_wake\n"	      \
-			      "3:\tjmp 18f\n\t"				      \
-			      "4:\t.size _L_unlock_%=, 4b-1b\n\t"	      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_3			      \
-			      "18:"					      \
-			      : "=m" (futex), "=&a" (ignore)		      \
-			      : "m" (futex),				      \
-				"i" (offsetof (tcbhead_t, multiple_threads))  \
-			      : "memory"); })
+# define __lll_unlock_asm "cmpl $0, %%gs:%P3\n\t"			      \
+			  "je 0f\n\t"					      \
+			  "lock\n"					      \
+			  "0:\tsubl $1,%0\n\t"
 #endif
 
+#define lll_unlock(futex, private) \
+  (void)								      \
+    ({ int ignore;							      \
+       if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	 __asm __volatile (__lll_unlock_asm				      \
+			   "jne _L_unlock_%=\n\t"			      \
+			   ".subsection 1\n\t"				      \
+			   ".type _L_unlock_%=,@function\n"		      \
+			   "_L_unlock_%=:\n"				      \
+			   "1:\tleal %0, %%eax\n"			      \
+			   "2:\tcall __lll_unlock_wake_private\n"	      \
+			   "3:\tjmp 18f\n"				      \
+			   "4:\t.size _L_unlock_%=, 4b-1b\n\t"		      \
+			   ".previous\n"				      \
+			   LLL_STUB_UNWIND_INFO_3			      \
+			   "18:"					      \
+			   : "=m" (futex), "=&a" (ignore)		      \
+			   : "m" (futex), "i" (MULTIPLE_THREADS_OFFSET)	      \
+			   : "memory");					      \
+       else								      \
+	 {								      \
+	   int ignore2;							      \
+	   __asm __volatile (__lll_unlock_asm				      \
+			     "jne _L_unlock_%=\n\t"			      \
+			     ".subsection 1\n\t"			      \
+			     ".type _L_unlock_%=,@function\n"		      \
+			     "_L_unlock_%=:\n"				      \
+			     "1:\tleal %0, %%eax\n"			      \
+			     "0:\tmovl %5, %%ecx\n"			      \
+			     "2:\tcall __lll_unlock_wake\n"		      \
+			     "3:\tjmp 18f\n"				      \
+			     "4:\t.size _L_unlock_%=, 4b-1b\n\t"	      \
+			     ".previous\n"				      \
+			     LLL_STUB_UNWIND_INFO_4			      \
+			     "18:"					      \
+			     : "=m" (futex), "=&a" (ignore), "=&c" (ignore2)  \
+			     : "i" (MULTIPLE_THREADS_OFFSET), "m" (futex),    \
+			       "g" (private)				      \
+			     : "memory");				      \
+	 }								      \
+    })
+
+#define lll_robust_unlock(futex, private) \
+  (void)								      \
+    ({ int ignore, ignore2;						      \
+       __asm __volatile (LOCK_INSTR "andl %3, %0\n\t"			      \
+			 "jne _L_robust_unlock_%=\n\t"			      \
+			 ".subsection 1\n\t"				      \
+			 ".type _L_robust_unlock_%=,@function\n"	      \
+			 "_L_robust_unlock_%=:\n\t"			      \
+			 "1:\tleal %0, %%eax\n"				      \
+			 "0:\tmovl %5, %%ecx\n"				      \
+			 "2:\tcall __lll_unlock_wake\n"			      \
+			 "3:\tjmp 18f\n"				      \
+			 "4:\t.size _L_robust_unlock_%=, 4b-1b\n\t"	      \
+			 ".previous\n"					      \
+			 LLL_STUB_UNWIND_INFO_4				      \
+			 "18:"						      \
+			 : "=m" (futex), "=&a" (ignore), "=&c" (ignore2)      \
+			 : "i" (FUTEX_WAITERS), "m" (futex), "g" (private)    \
+			 : "memory");					      \
+    })
+
+
+#define lll_robust_dead(futex, private) \
+  (void)								      \
+    ({ int __ignore;							      \
+       register int _nr asm ("edx") = 1;				      \
+       __asm __volatile (LOCK_INSTR "orl %5, (%2)\n\t"			      \
+			 LLL_EBX_LOAD					      \
+			 LLL_ENTER_KERNEL				      \
+			 LLL_EBX_LOAD					      \
+			 : "=a" (__ignore)				      \
+			 : "0" (SYS_futex), LLL_EBX_REG (&(futex)),	      \
+			   "c" (__lll_private_flag (FUTEX_WAKE, private)),    \
+			   "d" (_nr), "i" (FUTEX_OWNER_DIED),		      \
+			   "i" (offsetof (tcbhead_t, sysinfo)));	      \
+    })
 
 #define lll_islocked(futex) \
   (futex != LLL_LOCK_INITIALIZER)
 
-
 /* The kernel notifies a process with uses CLONE_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
    thread ID while the clone is running and is reset to zero
@@ -581,28 +572,6 @@ extern int __lll_timedwait_tid (int *tid, const struct timespec *abstime)
       }									      \
     __result; })
 
-
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond)
-     __attribute ((regparm (1))) attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     __attribute ((regparm (2))) attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond)
-     __attribute ((regparm (1))) attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond)
-     __attribute ((regparm (1))) attribute_hidden;
-
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
+#endif  /* !__ASSEMBLER__ */
 
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S b/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
index 8ff0dad27f..f31d968bc5 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
@@ -20,19 +20,9 @@
 #include <unwindbuf.h>
 #include <sysdep.h>
 #include <kernel-features.h>
+#include <lowlevellock.h>
 
 
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-#define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
-
 	.comm	__fork_generation, 4, 4
 
 	.text
diff --git a/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h
index 095f0e8aca..3c28a397ea 100644
--- a/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/ia64/lowlevellock.h
@@ -73,9 +73,6 @@
 /* Delay in spinlock loop.  */
 #define BUSY_WAIT_NOP          asm ("hint @pause")
 
-/* Initializer for compatibility lock.	*/
-#define LLL_MUTEX_LOCK_INITIALIZER (0)
-
 #define lll_futex_wait(futex, val, private) \
   lll_futex_timed_wait (futex, val, NULL, private)
 
@@ -95,12 +92,13 @@
    _r10 == -1 ? -_retval : _retval;					\
 })
 
-#define lll_robust_mutex_dead(futexv)					\
+#define lll_robust_dead(futexv, private)				\
 do									\
   {									\
     int *__futexp = &(futexv);						\
     atomic_or (__futexp, FUTEX_OWNER_DIED);				\
-    DO_INLINE_SYSCALL(futex, 3, (long) __futexp, FUTEX_WAKE, 1);	\
+    DO_INLINE_SYSCALL(futex, 3, (long) __futexp,			\
+		      __lll_private_flag (FUTEX_WAKE, private), 1);	\
   }									\
 while (0)
 
@@ -123,156 +121,144 @@ while (0)
 })
 
 
-#define __lll_mutex_trylock(futex) \
+#define __lll_trylock(futex) \
   (atomic_compare_and_exchange_val_acq (futex, 1, 0) != 0)
-#define lll_mutex_trylock(futex) __lll_mutex_trylock (&(futex))
+#define lll_trylock(futex) __lll_trylock (&(futex))
 
 
-#define __lll_robust_mutex_trylock(futex, id) \
+#define __lll_robust_trylock(futex, id) \
   (atomic_compare_and_exchange_val_acq (futex, id, 0) != 0)
-#define lll_robust_mutex_trylock(futex, id) \
-  __lll_robust_mutex_trylock (&(futex), id)
+#define lll_robust_trylock(futex, id) \
+  __lll_robust_trylock (&(futex), id)
 
 
-#define __lll_mutex_cond_trylock(futex) \
+#define __lll_cond_trylock(futex) \
   (atomic_compare_and_exchange_val_acq (futex, 2, 0) != 0)
-#define lll_mutex_cond_trylock(futex) __lll_mutex_cond_trylock (&(futex))
+#define lll_cond_trylock(futex) __lll_cond_trylock (&(futex))
 
 
-extern void __lll_lock_wait (int *futex) attribute_hidden;
-extern int __lll_robust_lock_wait (int *futex) attribute_hidden;
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
 
 
-#define __lll_mutex_lock(futex)						\
-  ((void) ({								\
-    int *__futex = (futex);						\
-    if (atomic_compare_and_exchange_bool_acq (__futex, 1, 0) != 0)	\
-      __lll_lock_wait (__futex);					\
+#define __lll_lock(futex, private)					      \
+  ((void) ({								      \
+    int *__futex = (futex);						      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex,      \
+								1, 0), 0))    \
+      {									      \
+	if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	  __lll_lock_wait_private (__futex);				      \
+	else								      \
+	  __lll_lock_wait (__futex, private);				      \
   }))
-#define lll_mutex_lock(futex) __lll_mutex_lock (&(futex))
-
-
-#define __lll_robust_mutex_lock(futex, id)				\
-  ({									\
-    int *__futex = (futex);						\
-    int __val = 0;							\
-									\
-    if (atomic_compare_and_exchange_bool_acq (__futex, id, 0) != 0)	\
-      __val = __lll_robust_lock_wait (__futex);				\
-    __val;								\
+#define lll_lock(futex, private) __lll_lock (&(futex), private)
+
+
+#define __lll_robust_lock(futex, id, private)				      \
+  ({									      \
+    int *__futex = (futex);						      \
+    int __val = 0;							      \
+									      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id,  \
+								0), 0))	      \
+      __val = __lll_robust_lock_wait (__futex, private);		      \
+    __val;								      \
   })
-#define lll_robust_mutex_lock(futex, id) __lll_robust_mutex_lock (&(futex), id)
+#define lll_robust_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), id, private)
 
 
-#define __lll_mutex_cond_lock(futex)					\
-  ((void) ({								\
-    int *__futex = (futex);						\
-    if (atomic_compare_and_exchange_bool_acq (__futex, 2, 0) != 0)	\
-      __lll_lock_wait (__futex);					\
+#define __lll_cond_lock(futex, private)					      \
+  ((void) ({								      \
+    int *__futex = (futex);						      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, 2,   \
+								0), 0))	      \
+      __lll_lock_wait (__futex, private);				      \
   }))
-#define lll_mutex_cond_lock(futex) __lll_mutex_cond_lock (&(futex))
-
-
-#define __lll_robust_mutex_cond_lock(futex, id)				\
-  ({									\
-    int *__futex = (futex);						\
-    int __val = 0;							\
-    int __id = (id) | FUTEX_WAITERS;					\
-									\
-    if (atomic_compare_and_exchange_bool_acq (__futex, __id, 0) != 0)	\
-      __val = __lll_robust_lock_wait (__futex);				\
-    __val;								\
+#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
+
+
+#define __lll_robust_cond_lock(futex, id, private)			      \
+  ({									      \
+    int *__futex = (futex);						      \
+    int __val = 0;							      \
+    int __id = (id) | FUTEX_WAITERS;					      \
+									      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex,      \
+								__id, 0), 0)) \
+      __val = __lll_robust_lock_wait (__futex, private);		      \
+    __val;								      \
   })
-#define lll_robust_mutex_cond_lock(futex, id) \
-  __lll_robust_mutex_cond_lock (&(futex), id)
+#define lll_robust_cond_lock(futex, id, private) \
+  __lll_robust_cond_lock (&(futex), id, private)
 
 
-extern int __lll_timedlock_wait (int *futex, const struct timespec *)
-     attribute_hidden;
-extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *)
-     attribute_hidden;
+extern int __lll_timedlock_wait (int *futex, const struct timespec *,
+				 int private) attribute_hidden;
+extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *,
+					int private) attribute_hidden;
 
 
-#define __lll_mutex_timedlock(futex, abstime)				\
-  ({									\
-     int *__futex = (futex);						\
-     int __val = 0;							\
-									\
-     if (atomic_compare_and_exchange_bool_acq (__futex, 1, 0) != 0)	\
-       __val = __lll_timedlock_wait (__futex, abstime);			\
-     __val;								\
+#define __lll_timedlock(futex, abstime, private)			      \
+  ({									      \
+     int *__futex = (futex);						      \
+     int __val = 0;							      \
+									      \
+     if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, 1,  \
+								 0), 0))      \
+       __val = __lll_timedlock_wait (__futex, abstime, private);	      \
+     __val;								      \
   })
-#define lll_mutex_timedlock(futex, abstime) \
-  __lll_mutex_timedlock (&(futex), abstime)
-
-
-#define __lll_robust_mutex_timedlock(futex, abstime, id)		\
-  ({									\
-    int *__futex = (futex);						\
-    int __val = 0;							\
-									\
-    if (atomic_compare_and_exchange_bool_acq (__futex, id, 0) != 0)	\
-      __val = __lll_robust_timedlock_wait (__futex, abstime);		\
-    __val;								\
+#define lll_timedlock(futex, abstime, private) \
+  __lll_timedlock (&(futex), abstime, private)
+
+
+#define __lll_robust_timedlock(futex, abstime, id, private)		      \
+  ({									      \
+    int *__futex = (futex);						      \
+    int __val = 0;							      \
+									      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id,  \
+								0), 0))	      \
+      __val = __lll_robust_timedlock_wait (__futex, abstime, private);	      \
+    __val;								      \
   })
-#define lll_robust_mutex_timedlock(futex, abstime, id) \
-  __lll_robust_mutex_timedlock (&(futex), abstime, id)
+#define lll_robust_timedlock(futex, abstime, id, private) \
+  __lll_robust_timedlock (&(futex), abstime, id, private)
 
 
-#define __lll_mutex_unlock(futex)			\
-  ((void) ({						\
-    int *__futex = (futex);				\
-    int __val = atomic_exchange_rel (__futex, 0);	\
-							\
-    if (__builtin_expect (__val > 1, 0))		\
-      lll_futex_wake (__futex, 1, LLL_SHARED);		\
+#define __lll_unlock(futex, private)					      \
+  ((void) ({								      \
+    int *__futex = (futex);						      \
+    int __val = atomic_exchange_rel (__futex, 0);			      \
+									      \
+    if (__builtin_expect (__val > 1, 0))				      \
+      lll_futex_wake (__futex, 1, private);				      \
   }))
-#define lll_mutex_unlock(futex) \
-  __lll_mutex_unlock(&(futex))
+#define lll_unlock(futex, private) __lll_unlock(&(futex), private)
 
 
-#define __lll_robust_mutex_unlock(futex)		\
-  ((void) ({						\
-    int *__futex = (futex);				\
-    int __val = atomic_exchange_rel (__futex, 0);	\
-							\
-    if (__builtin_expect (__val & FUTEX_WAITERS, 0))	\
-      lll_futex_wake (__futex, 1, LLL_SHARED);		\
+#define __lll_robust_unlock(futex, private)				      \
+  ((void) ({								      \
+    int *__futex = (futex);						      \
+    int __val = atomic_exchange_rel (__futex, 0);			      \
+									      \
+    if (__builtin_expect (__val & FUTEX_WAITERS, 0))			      \
+      lll_futex_wake (__futex, 1, private);				      \
   }))
-#define lll_robust_mutex_unlock(futex) \
-  __lll_robust_mutex_unlock(&(futex))
+#define lll_robust_unlock(futex, private) \
+  __lll_robust_unlock(&(futex), private)
 
 
-#define __lll_mutex_unlock_force(futex)		\
-  ((void) ({					\
-    int *__futex = (futex);			\
-    (void) atomic_exchange_rel (__futex, 0);	\
-    lll_futex_wake (__futex, 1, LLL_SHARED);	\
-  }))
-#define lll_mutex_unlock_force(futex) \
-  __lll_mutex_unlock_force(&(futex))
-
-
-#define lll_mutex_islocked(futex) \
+#define lll_islocked(futex) \
   (futex != 0)
 
-
-/* We have a separate internal lock implementation which is not tied
-   to binary compatibility.  We can use the lll_mutex_*.  */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
 /* Initializers for lock.  */
 #define LLL_LOCK_INITIALIZER		(0)
 #define LLL_LOCK_INITIALIZER_LOCKED	(1)
 
-#define lll_trylock(futex)	lll_mutex_trylock (futex)
-#define lll_lock(futex)		lll_mutex_lock (futex)
-#define lll_unlock(futex)	lll_mutex_unlock (futex)
-#define lll_islocked(futex)	lll_mutex_islocked (futex)
-
-
 /* The kernel notifies a process with uses CLONE_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
    thread ID while the clone is running and is reset to zero
@@ -297,26 +283,4 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
     __res;						\
   })
 
-
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond)
-     attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond)
-     attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond)
-     attribute_hidden;
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/lowlevellock.c b/nptl/sysdeps/unix/sysv/linux/lowlevellock.c
index ab7f605f0c..1187800148 100644
--- a/nptl/sysdeps/unix/sysv/linux/lowlevellock.c
+++ b/nptl/sysdeps/unix/sysv/linux/lowlevellock.c
@@ -25,22 +25,35 @@
 
 
 void
-__lll_lock_wait (int *futex)
+__lll_lock_wait_private (int *futex)
 {
   do
     {
       int oldval = atomic_compare_and_exchange_val_acq (futex, 2, 1);
       if (oldval != 0)
-	lll_futex_wait (futex, 2,
-			// XYZ check mutex flag
-			LLL_SHARED);
+	lll_futex_wait (futex, 2, LLL_PRIVATE);
+    }
+  while (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0);
+}
+
+
+/* These functions doesn't get included in libc.so  */
+#ifdef IS_IN_libpthread
+void
+__lll_lock_wait (int *futex, int private)
+{
+  do
+    {
+      int oldval = atomic_compare_and_exchange_val_acq (futex, 2, 1);
+      if (oldval != 0)
+	lll_futex_wait (futex, 2, private);
     }
   while (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0);
 }
 
 
 int
-__lll_timedlock_wait (int *futex, const struct timespec *abstime)
+__lll_timedlock_wait (int *futex, const struct timespec *abstime, int private)
 {
   /* Reject invalid timeouts.  */
   if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
@@ -70,9 +83,7 @@ __lll_timedlock_wait (int *futex, const struct timespec *abstime)
       /* Wait.  */
       int oldval = atomic_compare_and_exchange_val_acq (futex, 2, 1);
       if (oldval != 0)
-	lll_futex_timed_wait (futex, 2, &rt,
-			      // XYZ check mutex flag
-			      LLL_SHARED);
+	lll_futex_timed_wait (futex, 2, &rt, private);
     }
   while (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0);
 
@@ -80,8 +91,6 @@ __lll_timedlock_wait (int *futex, const struct timespec *abstime)
 }
 
 
-/* This function doesn't get included in libc.so  */
-#ifdef IS_IN_libpthread
 int
 __lll_timedwait_tid (int *tidp, const struct timespec *abstime)
 {
diff --git a/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c b/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
index 54cee0859b..3830f94daa 100644
--- a/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
+++ b/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
@@ -25,7 +25,7 @@
 
 
 int
-__lll_robust_lock_wait (int *futex)
+__lll_robust_lock_wait (int *futex, int private)
 {
   int oldval = *futex;
   int tid = THREAD_GETMEM (THREAD_SELF, tid);
@@ -44,9 +44,7 @@ __lll_robust_lock_wait (int *futex)
 	  && atomic_compare_and_exchange_bool_acq (futex, newval, oldval))
 	continue;
 
-      lll_futex_wait (futex, newval,
-		      // XYZ check mutex flag
-		      LLL_SHARED);
+      lll_futex_wait (futex, newval, private);
 
     try:
       ;
@@ -59,7 +57,8 @@ __lll_robust_lock_wait (int *futex)
 
 
 int
-__lll_robust_timedlock_wait (int *futex, const struct timespec *abstime)
+__lll_robust_timedlock_wait (int *futex, const struct timespec *abstime,
+			     int private)
 {
   /* Reject invalid timeouts.  */
   if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
@@ -102,9 +101,7 @@ __lll_robust_timedlock_wait (int *futex, const struct timespec *abstime)
 	  && atomic_compare_and_exchange_bool_acq (futex, newval, oldval))
 	continue;
 
-      lll_futex_timed_wait (futex, newval, &rt,
-			    // XYZ check mutex flag
-			    LLL_SHARED);
+      lll_futex_timed_wait (futex, newval, &rt, private);
 
     try:
       ;
diff --git a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
index cf91f21483..41804d1372 100644
--- a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
@@ -69,9 +69,6 @@
 # endif	      
 #endif
 
-/* Initializer for compatibility lock.	*/
-#define LLL_MUTEX_LOCK_INITIALIZER (0)
-
 #define lll_futex_wait(futexp, val, private) \
   lll_futex_timed_wait (futexp, val, NULL, private)
 
@@ -97,14 +94,15 @@
     INTERNAL_SYSCALL_ERROR_P (__ret, __err) ? -__ret : __ret;		      \
   })
 
-#define lll_robust_mutex_dead(futexv) \
+#define lll_robust_dead(futexv, private) \
   do									      \
     {									      \
       INTERNAL_SYSCALL_DECL (__err);					      \
       int *__futexp = &(futexv);					      \
 									      \
       atomic_or (__futexp, FUTEX_OWNER_DIED);				      \
-      INTERNAL_SYSCALL (futex, __err, 4, __futexp, FUTEX_WAKE, 1, 0);	      \
+      INTERNAL_SYSCALL (futex, __err, 4, __futexp,			      \
+			__lll_private_flag (FUTEX_WAKE, private), 1, 0);      \
     }									      \
   while (0)
 
@@ -171,119 +169,111 @@
      __val;								      \
   })
 
-#define lll_robust_mutex_trylock(lock, id) __lll_robust_trylock (&(lock), id)
+#define lll_robust_trylock(lock, id) __lll_robust_trylock (&(lock), id)
 
 /* Set *futex to 1 if it is 0, atomically.  Returns the old value */
 #define __lll_trylock(futex) __lll_robust_trylock (futex, 1)
 
-#define lll_mutex_trylock(lock)	__lll_trylock (&(lock))
+#define lll_trylock(lock)	__lll_trylock (&(lock))
 
 /* Set *futex to 2 if it is 0, atomically.  Returns the old value */
 #define __lll_cond_trylock(futex) __lll_robust_trylock (futex, 2)
 
-#define lll_mutex_cond_trylock(lock)	__lll_cond_trylock (&(lock))
+#define lll_cond_trylock(lock)	__lll_cond_trylock (&(lock))
 
 
-extern void __lll_lock_wait (int *futex) attribute_hidden;
-extern int __lll_robust_lock_wait (int *futex) attribute_hidden;
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
 
-#define lll_mutex_lock(lock) \
+#define lll_lock(lock, private) \
   (void) ({								      \
     int *__futex = &(lock);						      \
     if (__builtin_expect (atomic_compare_and_exchange_val_acq (__futex, 1, 0),\
 			  0) != 0)					      \
-      __lll_lock_wait (__futex);					      \
+      {									      \
+	if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	  __lll_lock_wait_private (__futex);				      \
+	else								      \
+	  __lll_lock_wait (__futex, private);				      \
+      }									      \
   })
 
-#define lll_robust_mutex_lock(lock, id) \
+#define lll_robust_lock(lock, id, private) \
   ({									      \
     int *__futex = &(lock);						      \
     int __val = 0;							      \
     if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id,  \
 								0), 0))	      \
-      __val = __lll_robust_lock_wait (__futex);				      \
+      __val = __lll_robust_lock_wait (__futex, private);		      \
     __val;								      \
   })
 
-#define lll_mutex_cond_lock(lock) \
+#define lll_cond_lock(lock, private) \
   (void) ({								      \
     int *__futex = &(lock);						      \
     if (__builtin_expect (atomic_compare_and_exchange_val_acq (__futex, 2, 0),\
 			  0) != 0)					      \
-      __lll_lock_wait (__futex);					      \
+      __lll_lock_wait (__futex, private);				      \
   })
 
-#define lll_robust_mutex_cond_lock(lock, id) \
+#define lll_robust_cond_lock(lock, id, private) \
   ({									      \
     int *__futex = &(lock);						      \
     int __val = 0;							      \
     int __id = id | FUTEX_WAITERS;					      \
     if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, __id,\
 								0), 0))	      \
-      __val = __lll_robust_lock_wait (__futex);				      \
+      __val = __lll_robust_lock_wait (__futex, private);		      \
     __val;								      \
   })
 
 
 extern int __lll_timedlock_wait
-  (int *futex, const struct timespec *) attribute_hidden;
+  (int *futex, const struct timespec *, int private) attribute_hidden;
 extern int __lll_robust_timedlock_wait
-  (int *futex, const struct timespec *) attribute_hidden;
+  (int *futex, const struct timespec *, int private) attribute_hidden;
 
-#define lll_mutex_timedlock(lock, abstime) \
+#define lll_timedlock(lock, abstime, private) \
   ({									      \
     int *__futex = &(lock);						      \
     int __val = 0;							      \
     if (__builtin_expect (atomic_compare_and_exchange_val_acq (__futex, 1, 0),\
 			  0) != 0)					      \
-      __val = __lll_timedlock_wait (__futex, abstime);			      \
+      __val = __lll_timedlock_wait (__futex, abstime, private);		      \
     __val;								      \
   })
 
-#define lll_robust_mutex_timedlock(lock, abstime, id) \
+#define lll_robust_timedlock(lock, abstime, id, private) \
   ({									      \
     int *__futex = &(lock);						      \
     int __val = 0;							      \
     if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id,  \
 								0), 0))	      \
-      __val = __lll_robust_timedlock_wait (__futex, abstime);		      \
+      __val = __lll_robust_timedlock_wait (__futex, abstime, private);	      \
     __val;								      \
   })
 
-#define lll_mutex_unlock(lock) \
+#define lll_unlock(lock, private) \
   ((void) ({								      \
     int *__futex = &(lock);						      \
     int __val = atomic_exchange_rel (__futex, 0);			      \
     if (__builtin_expect (__val > 1, 0))				      \
-      lll_futex_wake (__futex, 1, LLL_SHARED);				      \
+      lll_futex_wake (__futex, 1, private);				      \
   }))
 
-#define lll_robust_mutex_unlock(lock) \
+#define lll_robust_unlock(lock, private) \
   ((void) ({								      \
     int *__futex = &(lock);						      \
     int __val = atomic_exchange_rel (__futex, 0);			      \
     if (__builtin_expect (__val & FUTEX_WAITERS, 0))			      \
-      lll_futex_wake (__futex, 1, LLL_SHARED);				      \
-  }))
-
-#define lll_mutex_unlock_force(lock) \
-  ((void) ({								      \
-    int *__futex = &(lock);						      \
-    *__futex = 0;							      \
-    __asm __volatile (__lll_rel_instr ::: "memory");			      \
-    lll_futex_wake (__futex, 1, LLL_SHARED);				      \
+      lll_futex_wake (__futex, 1, private);				      \
   }))
 
-#define lll_mutex_islocked(futex) \
+#define lll_islocked(futex) \
   (futex != 0)
 
 
-/* Our internal lock implementation is identical to the binary-compatible
-   mutex implementation. */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
 /* Initializers for lock.  */
 #define LLL_LOCK_INITIALIZER		(0)
 #define LLL_LOCK_INITIALIZER_LOCKED	(1)
@@ -293,11 +283,6 @@ typedef int lll_lock_t;
     1  -  taken by one user
    >1  -  taken by more users */
 
-#define lll_trylock(lock)	lll_mutex_trylock (lock)
-#define lll_lock(lock)		lll_mutex_lock (lock)
-#define lll_unlock(lock)	lll_mutex_unlock (lock)
-#define lll_islocked(lock)	lll_mutex_islocked (lock)
-
 /* The kernel notifies a process which uses CLONE_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
    thread ID while the clone is running and is reset to zero
@@ -320,26 +305,4 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
     __res;								      \
   })
 
-
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond)
-     attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond)
-     attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond)
-     attribute_hidden;
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/powerpc/sem_post.c b/nptl/sysdeps/unix/sysv/linux/powerpc/sem_post.c
index 0b18cdbcde..fd92f7b32d 100644
--- a/nptl/sysdeps/unix/sysv/linux/powerpc/sem_post.c
+++ b/nptl/sysdeps/unix/sysv/linux/powerpc/sem_post.c
@@ -29,11 +29,37 @@
 int
 __new_sem_post (sem_t *sem)
 {
+  struct new_sem *isem = (struct new_sem *) sem;
+
+  __asm __volatile (__lll_rel_instr ::: "memory");
+  atomic_increment (&isem->value);
+  __asm __volatile (__lll_acq_instr ::: "memory");
+  if (isem->nwaiters > 0)
+    {
+      int err = lll_futex_wake (&isem->value, 1,
+				isem->private ^ FUTEX_PRIVATE_FLAG);
+      if (__builtin_expect (err, 0) < 0)
+	{
+	  __set_errno (-err);
+	  return -1;
+	}
+    }
+  return 0;
+}
+versioned_symbol (libpthread, __new_sem_post, sem_post, GLIBC_2_1);
+
+#if SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_1)
+
+int
+attribute_compat_text_section
+__old_sem_post (sem_t *sem)
+{
   int *futex = (int *) sem;
 
   __asm __volatile (__lll_rel_instr ::: "memory");
   int nr = atomic_increment_val (futex);
-  int err = lll_futex_wake (futex, nr, LLL_SHARED);
+  /* We always have to assume it is a shared semaphore.  */
+  int err = lll_futex_wake (futex, 1, LLL_SHARED);
   if (__builtin_expect (err, 0) < 0)
     {
       __set_errno (-err);
@@ -41,8 +67,6 @@ __new_sem_post (sem_t *sem)
     }
   return 0;
 }
-versioned_symbol (libpthread, __new_sem_post, sem_post, GLIBC_2_1);
-#if SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_1)
-strong_alias (__new_sem_post, __old_sem_post)
+
 compat_symbol (libpthread, __old_sem_post, sem_post, GLIBC_2_0);
 #endif
diff --git a/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c b/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c
index a97351f880..81ecd6556b 100644
--- a/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c
+++ b/nptl/sysdeps/unix/sysv/linux/pthread_mutex_cond_lock.c
@@ -1,8 +1,8 @@
 #include <pthreadP.h>
 
-#define LLL_MUTEX_LOCK(mutex) lll_mutex_cond_lock (mutex)
-#define LLL_MUTEX_TRYLOCK(mutex) lll_mutex_cond_trylock (mutex)
-#define LLL_ROBUST_MUTEX_LOCK(mutex, id) lll_robust_mutex_cond_lock (mutex, id)
+#define LLL_MUTEX_LOCK(mutex) lll_cond_lock (mutex, /* XYZ */ LLL_SHARED)
+#define LLL_MUTEX_TRYLOCK(mutex) lll_cond_trylock (mutex)
+#define LLL_ROBUST_MUTEX_LOCK(mutex, id) lll_robust_cond_lock (mutex, id, /* XYZ */ LLL_SHARED)
 #define __pthread_mutex_lock __pthread_mutex_cond_lock
 #define NO_INCR
 
diff --git a/nptl/sysdeps/unix/sysv/linux/register-atfork.c b/nptl/sysdeps/unix/sysv/linux/register-atfork.c
index cb5b2b832f..231fc9b091 100644
--- a/nptl/sysdeps/unix/sysv/linux/register-atfork.c
+++ b/nptl/sysdeps/unix/sysv/linux/register-atfork.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -24,7 +24,7 @@
 
 
 /* Lock to protect allocation and deallocation of fork handlers.  */
-lll_lock_t __fork_lock = LLL_LOCK_INITIALIZER;
+int __fork_lock = LLL_LOCK_INITIALIZER;
 
 
 /* Number of pre-allocated handler entries.  */
@@ -85,7 +85,7 @@ __register_atfork (prepare, parent, child, dso_handle)
      void *dso_handle;
 {
   /* Get the lock to not conflict with other allocations.  */
-  lll_lock (__fork_lock);
+  lll_lock (__fork_lock, LLL_PRIVATE);
 
   struct fork_handler *newp = fork_handler_alloc ();
 
@@ -102,7 +102,7 @@ __register_atfork (prepare, parent, child, dso_handle)
     }
 
   /* Release the lock.  */
-  lll_unlock (__fork_lock);
+  lll_unlock (__fork_lock, LLL_PRIVATE);
 
   return newp == NULL ? ENOMEM : 0;
 }
@@ -112,7 +112,7 @@ libc_hidden_def (__register_atfork)
 libc_freeres_fn (free_mem)
 {
   /* Get the lock to not conflict with running forks.  */
-  lll_lock (__fork_lock);
+  lll_lock (__fork_lock, LLL_PRIVATE);
 
   /* No more fork handlers.  */
   __fork_handlers = NULL;
@@ -123,7 +123,7 @@ libc_freeres_fn (free_mem)
   memset (&fork_handler_pool, '\0', sizeof (fork_handler_pool));
 
   /* Release the lock.  */
-  lll_unlock (__fork_lock);
+  lll_unlock (__fork_lock, LLL_PRIVATE);
 
   /* We can free the memory after releasing the lock.  */
   while (runp != NULL)
diff --git a/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
index 4758b63bd0..ad4d27300f 100644
--- a/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
@@ -68,9 +68,6 @@
 # endif	      
 #endif
 
-/* Initializer for compatibility lock.	*/
-#define LLL_MUTEX_LOCK_INITIALIZER (0)
-
 #define lll_futex_wait(futex, val, private) \
   lll_futex_timed_wait (futex, val, NULL, private)
 
@@ -108,13 +105,13 @@
   })
 
 
-#define lll_robust_mutex_dead(futexv) \
+#define lll_robust_dead(futexv, private) \
   do									      \
     {									      \
       int *__futexp = &(futexv);					      \
 									      \
       atomic_or (__futexp, FUTEX_OWNER_DIED);				      \
-      lll_futex_wake (__futexp, 1, LLL_SHARED);				      \
+      lll_futex_wake (__futexp, 1, private);				      \
     }									      \
   while (0)
 
@@ -175,7 +172,7 @@
 
 static inline int
 __attribute__ ((always_inline))
-__lll_mutex_trylock (int *futex)
+__lll_trylock (int *futex)
 {
     unsigned int old;
 
@@ -184,12 +181,12 @@ __lll_mutex_trylock (int *futex)
 		       : "0" (0), "d" (1), "m" (*futex) : "cc", "memory" );
     return old != 0;
 }
-#define lll_mutex_trylock(futex) __lll_mutex_trylock (&(futex))
+#define lll_trylock(futex) __lll_trylock (&(futex))
 
 
 static inline int
 __attribute__ ((always_inline))
-__lll_mutex_cond_trylock (int *futex)
+__lll_cond_trylock (int *futex)
 {
     unsigned int old;
 
@@ -198,12 +195,12 @@ __lll_mutex_cond_trylock (int *futex)
 		       : "0" (0), "d" (2), "m" (*futex) : "cc", "memory" );
     return old != 0;
 }
-#define lll_mutex_cond_trylock(futex) __lll_mutex_cond_trylock (&(futex))
+#define lll_cond_trylock(futex) __lll_cond_trylock (&(futex))
 
 
 static inline int
 __attribute__ ((always_inline))
-__lll_robust_mutex_trylock (int *futex, int id)
+__lll_robust_trylock (int *futex, int id)
 {
     unsigned int old;
 
@@ -212,141 +209,121 @@ __lll_robust_mutex_trylock (int *futex, int id)
 		       : "0" (0), "d" (id), "m" (*futex) : "cc", "memory" );
     return old != 0;
 }
-#define lll_robust_mutex_trylock(futex, id) \
-  __lll_robust_mutex_trylock (&(futex), id)
+#define lll_robust_trylock(futex, id) \
+  __lll_robust_trylock (&(futex), id)
 
 
-extern void __lll_lock_wait (int *futex) attribute_hidden;
-extern int __lll_robust_lock_wait (int *futex) attribute_hidden;
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
 
 static inline void
 __attribute__ ((always_inline))
-__lll_mutex_lock (int *futex)
+__lll_lock (int *futex, int private)
 {
-  if (atomic_compare_and_exchange_bool_acq (futex, 1, 0) != 0)
-    __lll_lock_wait (futex);
+  if (__builtin_expect (atomic_compare_and_exchange_bool_acq (futex, 1, 0), 0))
+    {
+      if (__builtin_constant_p (private) && private == LLL_PRIVATE)
+	__lll_lock_wait_private (futex);
+      else
+	__lll_lock_wait (futex, private);
+    }
 }
-#define lll_mutex_lock(futex) __lll_mutex_lock (&(futex))
+#define lll_lock(futex, private) __lll_lock (&(futex), private)
 
 static inline int
 __attribute__ ((always_inline))
-__lll_robust_mutex_lock (int *futex, int id)
+__lll_robust_lock (int *futex, int id, int private)
 {
   int result = 0;
-  if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
-    result = __lll_robust_lock_wait (futex);
+  if (__builtin_expect (atomic_compare_and_exchange_bool_acq (futex, id, 0),
+			0))
+    result = __lll_robust_lock_wait (futex, private);
   return result;
 }
-#define lll_robust_mutex_lock(futex, id) __lll_robust_mutex_lock (&(futex), id)
+#define lll_robust_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), id, private)
 
 static inline void
 __attribute__ ((always_inline))
-__lll_mutex_cond_lock (int *futex)
+__lll_cond_lock (int *futex, int private)
 {
-  if (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0)
-    __lll_lock_wait (futex);
+  if (__builtin_expect (atomic_compare_and_exchange_bool_acq (futex, 2, 0), 0))
+    __lll_lock_wait (futex, private);
 }
-#define lll_mutex_cond_lock(futex) __lll_mutex_cond_lock (&(futex))
+#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
 
-#define lll_robust_mutex_cond_lock(futex, id) \
-  __lll_robust_mutex_lock (&(futex), (id) | FUTEX_WAITERS)
+#define lll_robust_cond_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), (id) | FUTEX_WAITERS, private)
 
 extern int __lll_timedlock_wait
-  (int *futex, const struct timespec *) attribute_hidden;
+  (int *futex, const struct timespec *, int private) attribute_hidden;
 extern int __lll_robust_timedlock_wait
-  (int *futex, const struct timespec *) attribute_hidden;
+  (int *futex, const struct timespec *, int private) attribute_hidden;
 
 static inline int
 __attribute__ ((always_inline))
-__lll_mutex_timedlock (int *futex, const struct timespec *abstime)
+__lll_timedlock (int *futex, const struct timespec *abstime, int private)
 {
   int result = 0;
-  if (atomic_compare_and_exchange_bool_acq (futex, 1, 0) != 0)
-    result = __lll_timedlock_wait (futex, abstime);
+  if (__builtin_expect (atomic_compare_and_exchange_bool_acq (futex, 1, 0), 0))
+    result = __lll_timedlock_wait (futex, abstime, private);
   return result;
 }
-#define lll_mutex_timedlock(futex, abstime) \
-  __lll_mutex_timedlock (&(futex), abstime)
+#define lll_timedlock(futex, abstime, private) \
+  __lll_timedlock (&(futex), abstime, private)
 
 static inline int
 __attribute__ ((always_inline))
-__lll_robust_mutex_timedlock (int *futex, const struct timespec *abstime,
-			      int id)
+__lll_robust_timedlock (int *futex, const struct timespec *abstime,
+			int id, int private)
 {
   int result = 0;
-  if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
-    result = __lll_robust_timedlock_wait (futex, abstime);
+  if (__builtin_expect (atomic_compare_and_exchange_bool_acq (futex, id, 0),
+			0))
+    result = __lll_robust_timedlock_wait (futex, abstime, private);
   return result;
 }
-#define lll_robust_mutex_timedlock(futex, abstime, id) \
-  __lll_robust_mutex_timedlock (&(futex), abstime, id)
+#define lll_robust_timedlock(futex, abstime, id, private) \
+  __lll_robust_timedlock (&(futex), abstime, id, private)
 
 
 static inline void
 __attribute__ ((always_inline))
-__lll_mutex_unlock (int *futex)
+__lll_unlock (int *futex, int private)
 {
   int oldval;
   int newval = 0;
 
   lll_compare_and_swap (futex, oldval, newval, "slr %2,%2");
-  if (oldval > 1)
-    lll_futex_wake (futex, 1, LLL_SHARED);
+  if (__builtin_expect (oldval > 1, 0))
+    lll_futex_wake (futex, 1, private);
 }
-#define lll_mutex_unlock(futex) \
-  __lll_mutex_unlock(&(futex))
+#define lll_unlock(futex, private) __lll_unlock(&(futex), private)
 
 
 static inline void
 __attribute__ ((always_inline))
-__lll_robust_mutex_unlock (int *futex, int mask)
+__lll_robust_unlock (int *futex, int private)
 {
   int oldval;
   int newval = 0;
 
   lll_compare_and_swap (futex, oldval, newval, "slr %2,%2");
-  if (oldval & mask)
-    lll_futex_wake (futex, 1, LLL_SHARED);
-}
-#define lll_robust_mutex_unlock(futex) \
-  __lll_robust_mutex_unlock(&(futex), FUTEX_WAITERS)
-
-
-static inline void
-__attribute__ ((always_inline))
-__lll_mutex_unlock_force (int *futex)
-{
-  *futex = 0;
-  lll_futex_wake (futex, 1, LLL_SHARED);
+  if (__builtin_expect (oldval & FUTEX_WAITERS, 0))
+    lll_futex_wake (futex, 1, private);
 }
-#define lll_mutex_unlock_force(futex) \
-  __lll_mutex_unlock_force(&(futex))
+#define lll_robust_unlock(futex, private) \
+  __lll_robust_unlock(&(futex), private)
 
-#define lll_mutex_islocked(futex) \
+#define lll_islocked(futex) \
   (futex != 0)
 
 
-/* We have a separate internal lock implementation which is not tied
-   to binary compatibility.  We can use the lll_mutex_*.  */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
 /* Initializers for lock.  */
 #define LLL_LOCK_INITIALIZER		(0)
 #define LLL_LOCK_INITIALIZER_LOCKED	(1)
 
-#define lll_trylock(futex)      lll_mutex_trylock (futex)
-#define lll_lock(futex)         lll_mutex_lock (futex)
-#define lll_unlock(futex)       lll_mutex_unlock (futex)
-#define lll_islocked(futex)     lll_mutex_islocked (futex)
-
-/* The states of a lock are:
-    1  -  untaken
-    0  -  taken by one user
-   <0  -  taken by more users */
-
-
 /* The kernel notifies a process with uses CLONE_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
    thread ID while the clone is running and is reset to zero
@@ -373,25 +350,4 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
     __res;								      \
   })
 
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond)
-     attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond)
-     attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond)
-     attribute_hidden;
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/sem_post.c b/nptl/sysdeps/unix/sysv/linux/sem_post.c
index 7f90325585..25b676fcd2 100644
--- a/nptl/sysdeps/unix/sysv/linux/sem_post.c
+++ b/nptl/sysdeps/unix/sysv/linux/sem_post.c
@@ -36,8 +36,7 @@ __new_sem_post (sem_t *sem)
   if (isem->nwaiters > 0)
     {
       int err = lll_futex_wake (&isem->value, 1,
-				// XYZ check mutex flag
-				LLL_SHARED);
+				isem->private ^ FUTEX_PRIVATE_FLAG);
       if (__builtin_expect (err, 0) < 0)
 	{
 	  __set_errno (-err);
diff --git a/nptl/sysdeps/unix/sysv/linux/sem_timedwait.c b/nptl/sysdeps/unix/sysv/linux/sem_timedwait.c
index 8f92d78abe..fdf0d74011 100644
--- a/nptl/sysdeps/unix/sysv/linux/sem_timedwait.c
+++ b/nptl/sysdeps/unix/sysv/linux/sem_timedwait.c
@@ -85,8 +85,7 @@ sem_timedwait (sem_t *sem, const struct timespec *abstime)
       int oldtype = __pthread_enable_asynccancel ();
 
       err = lll_futex_timed_wait (&isem->value, 0, &rt,
-				  // XYZ check mutex flag
-				  LLL_SHARED);
+				  isem->private ^ FUTEX_PRIVATE_FLAG);
 
       /* Disable asynchronous cancellation.  */
       __pthread_disable_asynccancel (oldtype);
diff --git a/nptl/sysdeps/unix/sysv/linux/sem_wait.c b/nptl/sysdeps/unix/sysv/linux/sem_wait.c
index 12f3f16c2d..20e2b481df 100644
--- a/nptl/sysdeps/unix/sysv/linux/sem_wait.c
+++ b/nptl/sysdeps/unix/sysv/linux/sem_wait.c
@@ -57,8 +57,7 @@ __new_sem_wait (sem_t *sem)
       int oldtype = __pthread_enable_asynccancel ();
 
       err = lll_futex_wait (&isem->value, 0,
-			    // XYZ check mutex flag
-			    LLL_SHARED);
+			    isem->private ^ FUTEX_PRIVATE_FLAG);
 
       /* Disable asynchronous cancellation.  */
       __pthread_disable_asynccancel (oldtype);
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/internaltypes.h b/nptl/sysdeps/unix/sysv/linux/sparc/internaltypes.h
new file mode 100644
index 0000000000..4d0ea51723
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/internaltypes.h
@@ -0,0 +1,18 @@
+#ifndef _INTERNALTYPES_H
+#include "../internaltypes.h"
+
+union sparc_pthread_barrier
+{
+  struct pthread_barrier b;
+  struct sparc_pthread_barrier_s
+    {
+      unsigned int curr_event;
+      int lock;
+      unsigned int left;
+      unsigned int init_count;
+      unsigned char left_lock;
+      unsigned char pshared;
+    } s;
+};
+
+#endif
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
index 922e3c21d3..38692bbd2d 100644
--- a/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
@@ -70,9 +70,6 @@
 #endif
 
 
-/* Initializer for compatibility lock.	*/
-#define LLL_MUTEX_LOCK_INITIALIZER (0)
-
 #define lll_futex_wait(futexp, val, private) \
   lll_futex_timed_wait (futexp, val, NULL, private)
 
@@ -110,12 +107,12 @@
     INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
   })
 
-#define lll_robust_mutex_dead(futexv) \
+#define lll_robust_dead(futexv, private) \
   do									      \
     {									      \
       int *__futexp = &(futexv);					      \
       atomic_or (__futexp, FUTEX_OWNER_DIED);				      \
-      lll_futex_wake (__futexp, 1, LLL_SHARED);				      \
+      lll_futex_wake (__futexp, 1, private);				      \
     }									      \
   while (0)
 
@@ -139,146 +136,132 @@
 
 static inline int
 __attribute__ ((always_inline))
-__lll_mutex_trylock (int *futex)
+__lll_trylock (int *futex)
 {
   return atomic_compare_and_exchange_val_24_acq (futex, 1, 0) != 0;
 }
-#define lll_mutex_trylock(futex) __lll_mutex_trylock (&(futex))
+#define lll_trylock(futex) __lll_trylock (&(futex))
 
 static inline int
 __attribute__ ((always_inline))
-__lll_mutex_cond_trylock (int *futex)
+__lll_cond_trylock (int *futex)
 {
   return atomic_compare_and_exchange_val_24_acq (futex, 2, 0) != 0;
 }
-#define lll_mutex_cond_trylock(futex) __lll_mutex_cond_trylock (&(futex))
+#define lll_cond_trylock(futex) __lll_cond_trylock (&(futex))
 
 static inline int
 __attribute__ ((always_inline))
-__lll_robust_mutex_trylock (int *futex, int id)
+__lll_robust_trylock (int *futex, int id)
 {
   return atomic_compare_and_exchange_val_acq (futex, id, 0) != 0;
 }
-#define lll_robust_mutex_trylock(futex, id) \
-  __lll_robust_mutex_trylock (&(futex), id)
+#define lll_robust_trylock(futex, id) \
+  __lll_robust_trylock (&(futex), id)
 
 
-extern void __lll_lock_wait (int *futex) attribute_hidden;
-extern int __lll_robust_lock_wait (int *futex) attribute_hidden;
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
 
 static inline void
 __attribute__ ((always_inline))
-__lll_mutex_lock (int *futex)
+__lll_lock (int *futex, int private)
 {
   int val = atomic_compare_and_exchange_val_24_acq (futex, 1, 0);
 
   if (__builtin_expect (val != 0, 0))
-    __lll_lock_wait (futex);
+    {
+      if (__builtin_constant_p (private) && private == LLL_PRIVATE)
+	__lll_lock_wait_private (futex);
+      else
+	__lll_lock_wait (futex, private);
+    }
 }
-#define lll_mutex_lock(futex) __lll_mutex_lock (&(futex))
+#define lll_lock(futex, private) __lll_lock (&(futex), private)
 
 static inline int
 __attribute__ ((always_inline))
-__lll_robust_mutex_lock (int *futex, int id)
+__lll_robust_lock (int *futex, int id, int private)
 {
   int result = 0;
   if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
-    result = __lll_robust_lock_wait (futex);
+    result = __lll_robust_lock_wait (futex, private);
   return result;
 }
-#define lll_robust_mutex_lock(futex, id) \
-  __lll_robust_mutex_lock (&(futex), id)
+#define lll_robust_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), id, private)
 
 static inline void
 __attribute__ ((always_inline))
-__lll_mutex_cond_lock (int *futex)
+__lll_cond_lock (int *futex, int private)
 {
   int val = atomic_compare_and_exchange_val_24_acq (futex, 2, 0);
 
   if (__builtin_expect (val != 0, 0))
-    __lll_lock_wait (futex);
+    __lll_lock_wait (futex, private);
 }
-#define lll_mutex_cond_lock(futex) __lll_mutex_cond_lock (&(futex))
+#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
 
-#define lll_robust_mutex_cond_lock(futex, id) \
-  __lll_robust_mutex_lock (&(futex), (id) | FUTEX_WAITERS)
+#define lll_robust_cond_lock(futex, id, private) \
+  __lll_robust_lock (&(futex), (id) | FUTEX_WAITERS, private)
 
 
-extern int __lll_timedlock_wait (int *futex, const struct timespec *)
-     attribute_hidden;
-extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *)
-     attribute_hidden;
+extern int __lll_timedlock_wait (int *futex, const struct timespec *,
+				 int private) attribute_hidden;
+extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *,
+					int private) attribute_hidden;
 
 static inline int
 __attribute__ ((always_inline))
-__lll_mutex_timedlock (int *futex, const struct timespec *abstime)
+__lll_timedlock (int *futex, const struct timespec *abstime, int private)
 {
   int val = atomic_compare_and_exchange_val_24_acq (futex, 1, 0);
   int result = 0;
 
   if (__builtin_expect (val != 0, 0))
-    result = __lll_timedlock_wait (futex, abstime);
+    result = __lll_timedlock_wait (futex, abstime, private);
   return result;
 }
-#define lll_mutex_timedlock(futex, abstime) \
-  __lll_mutex_timedlock (&(futex), abstime)
+#define lll_timedlock(futex, abstime, private) \
+  __lll_timedlock (&(futex), abstime, private)
 
 static inline int
 __attribute__ ((always_inline))
-__lll_robust_mutex_timedlock (int *futex, const struct timespec *abstime,
-			      int id)
+__lll_robust_timedlock (int *futex, const struct timespec *abstime,
+			int id, int private)
 {
   int result = 0;
   if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
-    result = __lll_robust_timedlock_wait (futex, abstime);
+    result = __lll_robust_timedlock_wait (futex, abstime, private);
   return result;
 }
-#define lll_robust_mutex_timedlock(futex, abstime, id) \
-  __lll_robust_mutex_timedlock (&(futex), abstime, id)
+#define lll_robust_timedlock(futex, abstime, id, private) \
+  __lll_robust_timedlock (&(futex), abstime, id, private)
 
-#define lll_mutex_unlock(lock) \
+#define lll_unlock(lock, private) \
   ((void) ({								      \
     int *__futex = &(lock);						      \
     int __val = atomic_exchange_24_rel (__futex, 0);			      \
     if (__builtin_expect (__val > 1, 0))				      \
-      lll_futex_wake (__futex, 1, LLL_SHARED);				      \
+      lll_futex_wake (__futex, 1, private);				      \
   }))
 
-#define lll_robust_mutex_unlock(lock) \
+#define lll_robust_unlock(lock, private) \
   ((void) ({								      \
     int *__futex = &(lock);						      \
     int __val = atomic_exchange_rel (__futex, 0);			      \
     if (__builtin_expect (__val & FUTEX_WAITERS, 0))			      \
-      lll_futex_wake (__futex, 1, LLL_SHARED);				      \
-  }))
-
-#define lll_mutex_unlock_force(lock) \
-  ((void) ({								      \
-    int *__futex = &(lock);						      \
-    (void) atomic_exchange_24_rel (__futex, 0);				      \
-    lll_futex_wake (__futex, 1, LLL_SHARED);				      \
+      lll_futex_wake (__futex, 1, private);				      \
   }))
 
-#define lll_mutex_islocked(futex) \
+#define lll_islocked(futex) \
   (futex != 0)
 
-
-/* We have a separate internal lock implementation which is not tied
-   to binary compatibility.  We can use the lll_mutex_*.  */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
 /* Initializers for lock.  */
 #define LLL_LOCK_INITIALIZER		(0)
 #define LLL_LOCK_INITIALIZER_LOCKED	(1)
 
-#define lll_trylock(futex)	lll_mutex_trylock (futex)
-#define lll_lock(futex)		lll_mutex_lock (futex)
-#define lll_unlock(futex)	lll_mutex_unlock (futex)
-#define lll_islocked(futex)	lll_mutex_islocked (futex)
-
-
 /* The kernel notifies a process with uses CLONE_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
    thread ID while the clone is running and is reset to zero
@@ -303,26 +286,4 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
     __res;						\
   })
 
-
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond)
-     attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond)
-     attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond)
-     attribute_hidden;
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_destroy.c b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_destroy.c
new file mode 100644
index 0000000000..b677fb6cf1
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_destroy.c
@@ -0,0 +1,45 @@
+/* Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include "pthreadP.h"
+#include <lowlevellock.h>
+
+int
+pthread_barrier_destroy (barrier)
+     pthread_barrier_t *barrier;
+{
+  union sparc_pthread_barrier *ibarrier;
+  int result = EBUSY;
+
+  ibarrier = (union sparc_pthread_barrier *) barrier;
+
+  int private = ibarrier->s.pshared ? LLL_SHARED : LLL_PRIVATE;
+
+  lll_lock (ibarrier->b.lock, private);
+
+  if (__builtin_expect (ibarrier->b.left == ibarrier->b.init_count, 1))
+    /* The barrier is not used anymore.  */
+    result = 0;
+  else
+    /* Still used, return with an error.  */
+    lll_unlock (ibarrier->b.lock, private);
+
+  return result;
+}
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_init.c b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_init.c
new file mode 100644
index 0000000000..0a1159cf8a
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_init.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2002, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include "pthreadP.h"
+#include <lowlevellock.h>
+
+int
+pthread_barrier_init (barrier, attr, count)
+     pthread_barrier_t *barrier;
+     const pthread_barrierattr_t *attr;
+     unsigned int count;
+{
+  union sparc_pthread_barrier *ibarrier;
+
+  if (__builtin_expect (count == 0, 0))
+    return EINVAL;
+
+  struct pthread_barrierattr *iattr = (struct pthread_barrierattr *) attr;
+  if (iattr != NULL)
+    {
+      if (iattr->pshared != PTHREAD_PROCESS_PRIVATE
+	  && __builtin_expect (iattr->pshared != PTHREAD_PROCESS_SHARED, 0))
+	/* Invalid attribute.  */
+	return EINVAL;
+    }
+
+  ibarrier = (union sparc_pthread_barrier *) barrier;
+
+  /* Initialize the individual fields.  */
+  ibarrier->b.lock = LLL_LOCK_INITIALIZER;
+  ibarrier->b.left = count;
+  ibarrier->b.init_count = count;
+  ibarrier->b.curr_event = 0;
+  ibarrier->s.left_lock = 0;
+  ibarrier->s.pshared = (iattr && iattr->pshared == PTHREAD_PROCESS_SHARED);
+
+  return 0;
+}
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_wait.c b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_wait.c
new file mode 100644
index 0000000000..f67785fced
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_barrier_wait.c
@@ -0,0 +1,78 @@
+/* Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include <sysdep.h>
+#include <lowlevellock.h>
+#include <pthreadP.h>
+
+/* Wait on barrier.  */
+int
+pthread_barrier_wait (barrier)
+     pthread_barrier_t *barrier;
+{
+  union sparc_pthread_barrier *ibarrier
+    = (union sparc_pthread_barrier *) barrier;
+  int result = 0;
+  int private = ibarrier->s.pshared ? LLL_SHARED : LLL_PRIVATE;
+
+  /* Make sure we are alone.  */
+  lll_lock (ibarrier->b.lock, private);
+
+  /* One more arrival.  */
+  --ibarrier->b.left;
+
+  /* Are these all?  */
+  if (ibarrier->b.left == 0)
+    {
+      /* Yes. Increment the event counter to avoid invalid wake-ups and
+	 tell the current waiters that it is their turn.  */
+      ++ibarrier->b.curr_event;
+
+      /* Wake up everybody.  */
+      lll_futex_wake (&ibarrier->b.curr_event, INT_MAX, private);
+
+      /* This is the thread which finished the serialization.  */
+      result = PTHREAD_BARRIER_SERIAL_THREAD;
+    }
+  else
+    {
+      /* The number of the event we are waiting for.  The barrier's event
+	 number must be bumped before we continue.  */
+      unsigned int event = ibarrier->b.curr_event;
+
+      /* Before suspending, make the barrier available to others.  */
+      lll_unlock (ibarrier->b.lock, private);
+
+      /* Wait for the event counter of the barrier to change.  */
+      do
+	lll_futex_wait (&ibarrier->b.curr_event, event, private);
+      while (event == ibarrier->b.curr_event);
+    }
+
+  /* Make sure the init_count is stored locally or in a register.  */
+  unsigned int init_count = ibarrier->b.init_count;
+
+  /* If this was the last woken thread, unlock.  */
+  if (atomic_increment_val (&ibarrier->b.left) == init_count)
+    /* We are done.  */
+    lll_unlock (ibarrier->b.lock, private);
+
+  return result;
+}
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/lowlevellock.c b/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/lowlevellock.c
index cb9578b47b..1ee9b4737b 100644
--- a/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/lowlevellock.c
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/lowlevellock.c
@@ -25,20 +25,35 @@
 
 
 void
-__lll_lock_wait (int *futex)
+__lll_lock_wait_private (int *futex)
 {
   do
     {
       int oldval = atomic_compare_and_exchange_val_24_acq (futex, 2, 1);
       if (oldval != 0)
-	lll_futex_wait (futex, 2);
+	lll_futex_wait (futex, 2, LLL_PRIVATE);
+    }
+  while (atomic_compare_and_exchange_val_24_acq (futex, 2, 0) != 0);
+}
+
+#ifdef IS_IN_libpthread
+/* These functions don't get included in libc.so  */
+
+void
+__lll_lock_wait (int *futex, int private)
+{
+  do
+    {
+      int oldval = atomic_compare_and_exchange_val_24_acq (futex, 2, 1);
+      if (oldval != 0)
+	lll_futex_wait (futex, 2, private);
     }
   while (atomic_compare_and_exchange_val_24_acq (futex, 2, 0) != 0);
 }
 
 
 int
-__lll_timedlock_wait (int *futex, const struct timespec *abstime)
+__lll_timedlock_wait (int *futex, const struct timespec *abstime, int private)
 {
   /* Reject invalid timeouts.  */
   if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
@@ -68,7 +83,7 @@ __lll_timedlock_wait (int *futex, const struct timespec *abstime)
       /* Wait.  */
       int oldval = atomic_compare_and_exchange_val_24_acq (futex, 2, 1);
       if (oldval != 0)
-	lll_futex_timed_wait (futex, 2, &rt);
+	lll_futex_timed_wait (futex, 2, &rt, private);
     }
   while (atomic_compare_and_exchange_val_24_acq (futex, 2, 0) != 0);
 
@@ -76,8 +91,6 @@ __lll_timedlock_wait (int *futex, const struct timespec *abstime)
 }
 
 
-/* This function doesn't get included in libc.so  */
-#ifdef IS_IN_libpthread
 int
 __lll_timedwait_tid (int *tidp, const struct timespec *abstime)
 {
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/pthread_barrier_wait.c b/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/pthread_barrier_wait.c
index 868e0d2819..89a23490f0 100644
--- a/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/pthread_barrier_wait.c
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/pthread_barrier_wait.c
@@ -22,24 +22,18 @@
 #include <lowlevellock.h>
 #include <pthreadP.h>
 
-struct sparc_pthread_barrier
-{
-  struct pthread_barrier b;
-  unsigned char left_lock;
-  unsigned char pshared;
-};
-
 /* Wait on barrier.  */
 int
 pthread_barrier_wait (barrier)
      pthread_barrier_t *barrier;
 {
-  struct sparc_pthread_barrier *ibarrier
-    = (struct sparc_pthread_barrier *) barrier;
+  union sparc_pthread_barrier *ibarrier
+    = (union sparc_pthread_barrier *) barrier;
   int result = 0;
+  int private = ibarrier->s.pshared ? LLL_SHARED : LLL_PRIVATE;
 
   /* Make sure we are alone.  */
-  lll_lock (ibarrier->b.lock);
+  lll_lock (ibarrier->b.lock, private);
 
   /* One more arrival.  */
   --ibarrier->b.left;
@@ -52,9 +46,7 @@ pthread_barrier_wait (barrier)
       ++ibarrier->b.curr_event;
 
       /* Wake up everybody.  */
-      lll_futex_wake (&ibarrier->b.curr_event, INT_MAX,
-		      // XYZ check mutex flag
-		      LLL_SHARED);
+      lll_futex_wake (&ibarrier->b.curr_event, INT_MAX, private);
 
       /* This is the thread which finished the serialization.  */
       result = PTHREAD_BARRIER_SERIAL_THREAD;
@@ -66,13 +58,11 @@ pthread_barrier_wait (barrier)
       unsigned int event = ibarrier->b.curr_event;
 
       /* Before suspending, make the barrier available to others.  */
-      lll_unlock (ibarrier->b.lock);
+      lll_unlock (ibarrier->b.lock, private);
 
       /* Wait for the event counter of the barrier to change.  */
       do
-	lll_futex_wait (&ibarrier->b.curr_event, event,
-			// XYZ check mutex flag
-			LLL_SHARED);
+	lll_futex_wait (&ibarrier->b.curr_event, event, private);
       while (event == ibarrier->b.curr_event);
     }
 
@@ -80,11 +70,11 @@ pthread_barrier_wait (barrier)
   unsigned int init_count = ibarrier->b.init_count;
 
   /* If this was the last woken thread, unlock.  */
-  if (__atomic_is_v9 || ibarrier->pshared == 0)
+  if (__atomic_is_v9 || ibarrier->s.pshared == 0)
     {
       if (atomic_increment_val (&ibarrier->b.left) == init_count)
 	/* We are done.  */
-	lll_unlock (ibarrier->b.lock);
+	lll_unlock (ibarrier->b.lock, private);
     }
   else
     {
@@ -92,12 +82,12 @@ pthread_barrier_wait (barrier)
       /* Slightly more complicated.  On pre-v9 CPUs, atomic_increment_val
 	 is only atomic for threads within the same process, not for
 	 multiple processes.  */
-      __sparc32_atomic_do_lock24 (&ibarrier->left_lock);
+      __sparc32_atomic_do_lock24 (&ibarrier->s.left_lock);
       left = ++ibarrier->b.left;
-      __sparc32_atomic_do_unlock24 (&ibarrier->left_lock);
+      __sparc32_atomic_do_unlock24 (&ibarrier->s.left_lock);
       if (left == init_count)
         /* We are done.  */
-        lll_unlock (ibarrier->b.lock);
+	lll_unlock (ibarrier->b.lock, private);
     }
 
   return result;
diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/pthread_barrier_wait.c b/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/pthread_barrier_wait.c
index 55d20eb8f8..2d32e58240 100644
--- a/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/pthread_barrier_wait.c
+++ b/nptl/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/pthread_barrier_wait.c
@@ -1 +1 @@
-#include "../../../../../../../pthread_barrier_wait.c"
+#include "../../pthread_barrier_wait.c"
diff --git a/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c b/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
index bc5b7537a3..56a4f149e1 100644
--- a/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
+++ b/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
@@ -54,7 +54,7 @@ __unregister_atfork (dso_handle)
      that there couldn't have been another thread deleting something.
      The __unregister_atfork function is only called from the
      dlclose() code which itself serializes the operations.  */
-  lll_lock (__fork_lock);
+  lll_lock (__fork_lock, LLL_PRIVATE);
 
   /* We have to create a new list with all the entries we don't remove.  */
   struct deleted_handler
@@ -89,7 +89,7 @@ __unregister_atfork (dso_handle)
   while (runp != NULL);
 
   /* Release the lock.  */
-  lll_unlock (__fork_lock);
+  lll_unlock (__fork_lock, LLL_PRIVATE);
 
   /* Walk the list of all entries which have to be deleted.  */
   while (deleted != NULL)
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/libc-lowlevellock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-lowlevellock.S
index 3265eee0ed..ce8ad27aa7 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/libc-lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-lowlevellock.S
@@ -17,19 +17,4 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#include <kernel-features.h>
-
-/* All locks in libc are private.  Use the kernel feature if possible.  */
-#define FUTEX_PRIVATE_FLAG	128
-#ifdef __ASSUME_PRIVATE_FUTEX
-# define FUTEX_WAIT		(0 | FUTEX_PRIVATE_FLAG)
-# define FUTEX_WAKE		(1 | FUTEX_PRIVATE_FLAG)
-#else
-# define LOAD_FUTEX_WAIT(reg) \
-	movl	%fs:PRIVATE_FUTEX, reg
-# define LOAD_FUTEX_WAKE(reg) \
-	movl	%fs:PRIVATE_FUTEX, reg ; \
-	orl	$FUTEX_WAKE, reg
-#endif
-
 #include "lowlevellock.S"
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
index 502f1d442f..7065cfac32 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
@@ -19,33 +19,46 @@
 
 #include <sysdep.h>
 #include <pthread-errnos.h>
+#include <kernel-features.h>
+#include <lowlevellock.h>
 
 	.text
 
-#ifndef LOCK
-# ifdef UP
-#  define LOCK
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+	movl	$(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+# define LOAD_PRIVATE_FUTEX_WAKE(reg) \
+	movl	$(FUTEX_WAKE | FUTEX_PRIVATE_FLAG), reg
+# define LOAD_FUTEX_WAIT(reg) \
+	xorl	$(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+# define LOAD_FUTEX_WAKE(reg) \
+	xorl	$(FUTEX_WAKE | FUTEX_PRIVATE_FLAG), reg
+#else
+# if FUTEX_WAIT == 0
+#  define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+	movl    %fs:PRIVATE_FUTEX, reg
 # else
-#  define LOCK lock
+#  define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+	movl	%fs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAIT, reg
 # endif
-#endif
-
-#define SYS_futex		202
-#ifndef FUTEX_WAIT
-# define FUTEX_WAIT		0
-# define FUTEX_WAKE		1
-#endif
-
-#ifndef LOAD_FUTEX_WAIT
+# define LOAD_PRIVATE_FUTEX_WAKE(reg) \
+	movl    %fs:PRIVATE_FUTEX, reg ; \
+	orl     $FUTEX_WAKE, reg
 # if FUTEX_WAIT == 0
 #  define LOAD_FUTEX_WAIT(reg) \
-	xorl	reg, reg
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%fs:PRIVATE_FUTEX, reg
 # else
 #  define LOAD_FUTEX_WAIT(reg) \
-	movl	$FUTEX_WAIT, reg
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%fs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAIT, reg
 # endif
 # define LOAD_FUTEX_WAKE(reg) \
-	movl	$FUTEX_WAKE, reg
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%fs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAKE, reg
 #endif
 
 
@@ -53,11 +66,11 @@
 #define VSYSCALL_ADDR_vgettimeofday	0xffffffffff600000
 
 
-	.globl	__lll_mutex_lock_wait
-	.type	__lll_mutex_lock_wait,@function
-	.hidden	__lll_mutex_lock_wait
+	.globl	__lll_lock_wait_private
+	.type	__lll_lock_wait_private,@function
+	.hidden	__lll_lock_wait_private
 	.align	16
-__lll_mutex_lock_wait:
+__lll_lock_wait_private:
 	cfi_startproc
 	pushq	%r10
 	cfi_adjust_cfa_offset(8)
@@ -67,7 +80,7 @@ __lll_mutex_lock_wait:
 	cfi_offset(%rdx, -24)
 	xorq	%r10, %r10	/* No timeout.  */
 	movl	$2, %edx
-	LOAD_FUTEX_WAIT (%esi)
+	LOAD_PRIVATE_FUTEX_WAIT (%esi)
 
 	cmpl	%edx, %eax	/* NB:	 %edx == 2 */
 	jne	2f
@@ -89,15 +102,52 @@ __lll_mutex_lock_wait:
 	cfi_restore(%r10)
 	retq
 	cfi_endproc
-	.size	__lll_mutex_lock_wait,.-__lll_mutex_lock_wait
-
+	.size	__lll_lock_wait_private,.-__lll_lock_wait_private
 
 #ifdef NOT_IN_libc
-	.globl	__lll_mutex_timedlock_wait
-	.type	__lll_mutex_timedlock_wait,@function
-	.hidden	__lll_mutex_timedlock_wait
+	.globl	__lll_lock_wait
+	.type	__lll_lock_wait,@function
+	.hidden	__lll_lock_wait
 	.align	16
-__lll_mutex_timedlock_wait:
+__lll_lock_wait:
+	cfi_startproc
+	pushq	%r10
+	cfi_adjust_cfa_offset(8)
+	pushq	%rdx
+	cfi_adjust_cfa_offset(8)
+	cfi_offset(%r10, -16)
+	cfi_offset(%rdx, -24)
+	xorq	%r10, %r10	/* No timeout.  */
+	movl	$2, %edx
+	LOAD_FUTEX_WAIT (%esi)
+
+	cmpl	%edx, %eax	/* NB:	 %edx == 2 */
+	jne	2f
+
+1:	movl	$SYS_futex, %eax
+	syscall
+
+2:	movl	%edx, %eax
+	xchgl	%eax, (%rdi)	/* NB:	 lock is implied */
+
+	testl	%eax, %eax
+	jnz	1b
+
+	popq	%rdx
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%rdx)
+	popq	%r10
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%r10)
+	retq
+	cfi_endproc
+	.size	__lll_lock_wait,.-__lll_lock_wait
+
+	.globl	__lll_timedlock_wait
+	.type	__lll_timedlock_wait,@function
+	.hidden	__lll_timedlock_wait
+	.align	16
+__lll_timedlock_wait:
 	cfi_startproc
 	/* Check for a valid timeout value.  */
 	cmpq	$1000000000, 8(%rdx)
@@ -118,10 +168,12 @@ __lll_mutex_timedlock_wait:
 	cfi_offset(%r12, -32)
 	cfi_offset(%r13, -40)
 	cfi_offset(%r14, -48)
+	pushq	%rsi
+	cfi_adjust_cfa_offset(8)
 
 	/* Stack frame for the timespec and timeval structs.  */
-	subq	$16, %rsp
-	cfi_adjust_cfa_offset(16)
+	subq	$24, %rsp
+	cfi_adjust_cfa_offset(24)
 
 	movq	%rdi, %r12
 	movq	%rdx, %r13
@@ -162,6 +214,7 @@ __lll_mutex_timedlock_wait:
 	je	8f
 
 	movq	%rsp, %r10
+	movl	24(%rsp), %esi
 	LOAD_FUTEX_WAIT (%esi)
 	movq	%r12, %rdi
 	movl	$SYS_futex, %eax
@@ -174,8 +227,8 @@ __lll_mutex_timedlock_wait:
 	cmpxchgl %edx, (%r12)
 	jnz	7f
 
-6:	addq	$16, %rsp
-	cfi_adjust_cfa_offset(-16)
+6:	addq	$32, %rsp
+	cfi_adjust_cfa_offset(-32)
 	popq	%r14
 	cfi_adjust_cfa_offset(-8)
 	cfi_restore(%r14)
@@ -196,7 +249,7 @@ __lll_mutex_timedlock_wait:
 3:	movl	$EINVAL, %eax
 	retq
 
-	cfi_adjust_cfa_offset(56)
+	cfi_adjust_cfa_offset(72)
 	cfi_offset(%r8, -16)
 	cfi_offset(%r9, -24)
 	cfi_offset(%r12, -32)
@@ -216,15 +269,15 @@ __lll_mutex_timedlock_wait:
 5:	movl	$ETIMEDOUT, %eax
 	jmp	6b
 	cfi_endproc
-	.size	__lll_mutex_timedlock_wait,.-__lll_mutex_timedlock_wait
+	.size	__lll_timedlock_wait,.-__lll_timedlock_wait
 #endif
 
 
-	.globl	__lll_mutex_unlock_wake
-	.type	__lll_mutex_unlock_wake,@function
-	.hidden	__lll_mutex_unlock_wake
+	.globl	__lll_unlock_wake_private
+	.type	__lll_unlock_wake_private,@function
+	.hidden	__lll_unlock_wake_private
 	.align	16
-__lll_mutex_unlock_wake:
+__lll_unlock_wake_private:
 	cfi_startproc
 	pushq	%rsi
 	cfi_adjust_cfa_offset(8)
@@ -234,7 +287,7 @@ __lll_mutex_unlock_wake:
 	cfi_offset(%rdx, -24)
 
 	movl	$0, (%rdi)
-	LOAD_FUTEX_WAKE (%esi)
+	LOAD_PRIVATE_FUTEX_WAKE (%esi)
 	movl	$1, %edx	/* Wake one thread.  */
 	movl	$SYS_futex, %eax
 	syscall
@@ -247,10 +300,38 @@ __lll_mutex_unlock_wake:
 	cfi_restore(%rsi)
 	retq
 	cfi_endproc
-	.size	__lll_mutex_unlock_wake,.-__lll_mutex_unlock_wake
-
+	.size	__lll_unlock_wake_private,.-__lll_unlock_wake_private
 
 #ifdef NOT_IN_libc
+	.globl	__lll_unlock_wake
+	.type	__lll_unlock_wake,@function
+	.hidden	__lll_unlock_wake
+	.align	16
+__lll_unlock_wake:
+	cfi_startproc
+	pushq	%rsi
+	cfi_adjust_cfa_offset(8)
+	pushq	%rdx
+	cfi_adjust_cfa_offset(8)
+	cfi_offset(%rsi, -16)
+	cfi_offset(%rdx, -24)
+
+	movl	$0, (%rdi)
+	LOAD_FUTEX_WAKE (%esi)
+	movl	$1, %edx	/* Wake one thread.  */
+	movl	$SYS_futex, %eax
+	syscall
+
+	popq	%rdx
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%rdx)
+	popq	%rsi
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%rsi)
+	retq
+	cfi_endproc
+	.size	__lll_unlock_wake,.-__lll_unlock_wake
+
 	.globl	__lll_timedwait_tid
 	.type	__lll_timedwait_tid,@function
 	.hidden	__lll_timedwait_tid
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
index 7ec7deff17..192d203926 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
@@ -20,17 +20,27 @@
 #ifndef _LOWLEVELLOCK_H
 #define _LOWLEVELLOCK_H	1
 
-#include <time.h>
-#include <sys/param.h>
-#include <bits/pthreadtypes.h>
-#include <kernel-features.h>
-#include <tcb-offsets.h>
-
-#ifndef LOCK_INSTR
-# ifdef UP
-#  define LOCK_INSTR	/* nothing */
-# else
-#  define LOCK_INSTR "lock;"
+#ifndef __ASSEMBLER__
+# include <time.h>
+# include <sys/param.h>
+# include <bits/pthreadtypes.h>
+# include <kernel-features.h>
+# include <tcb-offsets.h>
+
+# ifndef LOCK_INSTR
+#  ifdef UP
+#   define LOCK_INSTR	/* nothing */
+#  else
+#   define LOCK_INSTR "lock;"
+#  endif
+# endif
+#else
+# ifndef LOCK
+#  ifdef UP
+#   define LOCK
+#  else
+#   define LOCK lock
+#  endif
 # endif
 #endif
 
@@ -38,11 +48,13 @@
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
 #define FUTEX_CMP_REQUEUE	4
+#define FUTEX_WAKE_OP	   5
 #define FUTEX_LOCK_PI		6
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_PRIVATE_FLAG	128
 
+#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE	((4 << 24) | 1)
 
 /* Values for 'private' parameter of locking macros.  Yes, the
    definition seems to be backwards.  But it is not.  The bit will be
@@ -50,6 +62,8 @@
 #define LLL_PRIVATE	0
 #define LLL_SHARED	FUTEX_PRIVATE_FLAG
 
+#ifndef __ASSEMBLER__
+
 #if !defined NOT_IN_libc || defined IS_IN_rtld
 /* In libc.so or ld.so all futexes are private.  */
 # ifdef __ASSUME_PRIVATE_FUTEX
@@ -76,13 +90,13 @@
 # endif	      
 #endif
 
-/* Initializer for compatibility lock.  */
-#define LLL_MUTEX_LOCK_INITIALIZER		(0)
-#define LLL_MUTEX_LOCK_INITIALIZER_LOCKED	(1)
-#define LLL_MUTEX_LOCK_INITIALIZER_WAITERS	(2)
+/* Initializer for lock.  */
+#define LLL_LOCK_INITIALIZER		(0)
+#define LLL_LOCK_INITIALIZER_LOCKED	(1)
+#define LLL_LOCK_INITIALIZER_WAITERS	(2)
 
 /* Delay in spinlock loop.  */
-#define BUSY_WAIT_NOP          asm ("rep; nop")
+#define BUSY_WAIT_NOP	  asm ("rep; nop")
 
 
 #define LLL_STUB_UNWIND_INFO_START \
@@ -196,7 +210,7 @@ LLL_STUB_UNWIND_INFO_END
 		      : "=a" (__status)					      \
 		      : "0" (SYS_futex), "D" (futex),			      \
 			"S" (__lll_private_flag (FUTEX_WAIT, private)),	      \
-		        "d" (_val), "r" (__to)				      \
+			"d" (_val), "r" (__to)				      \
 		      : "memory", "cc", "r11", "cx");			      \
     __status;								      \
   })
@@ -215,242 +229,308 @@ LLL_STUB_UNWIND_INFO_END
   } while (0)
 
 
-
-/* Does not preserve %eax and %ecx.  */
-extern int __lll_mutex_lock_wait (int *__futex, int __val) attribute_hidden;
-/* Does not preserver %eax, %ecx, and %edx.  */
-extern int __lll_mutex_timedlock_wait (int *__futex, int __val,
-				       const struct timespec *__abstime)
-     attribute_hidden;
-/* Preserves all registers but %eax.  */
-extern int __lll_mutex_unlock_wait (int *__futex) attribute_hidden;
-
-
-/* NB: in the lll_mutex_trylock macro we simply return the value in %eax
+/* NB: in the lll_trylock macro we simply return the value in %eax
    after the cmpxchg instruction.  In case the operation succeded this
    value is zero.  In case the operation failed, the cmpxchg instruction
    has loaded the current value of the memory work which is guaranteed
    to be nonzero.  */
-#define lll_mutex_trylock(futex) \
+#if defined NOT_IN_libc || defined UP
+# define __lll_trylock_asm LOCK_INSTR "cmpxchgl %2, %1"
+#else
+# define __lll_trylock_asm "cmpl $0, __libc_multiple_threads(%%rip)\n\t"      \
+			   "je 0f\n\t"					      \
+			   "lock; cmpxchgl %2, %1\n\t"			      \
+			   "jmp 1f\n\t"					      \
+			   "0:\tcmpxchgl %2, %1\n\t"			      \
+			   "1:"
+#endif
+
+#define lll_trylock(futex) \
   ({ int ret;								      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
+     __asm __volatile (__lll_trylock_asm				      \
 		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_MUTEX_LOCK_INITIALIZER_LOCKED), "m" (futex),\
-			 "0" (LLL_MUTEX_LOCK_INITIALIZER)		      \
+		       : "r" (LLL_LOCK_INITIALIZER_LOCKED), "m" (futex),      \
+			 "0" (LLL_LOCK_INITIALIZER)			      \
 		       : "memory");					      \
      ret; })
 
-
-#define lll_robust_mutex_trylock(futex, id)				      \
+#define lll_robust_trylock(futex, id) \
   ({ int ret;								      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
 		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (id), "m" (futex),				      \
-			 "0" (LLL_MUTEX_LOCK_INITIALIZER)		      \
+		       : "r" (id), "m" (futex),	"0" (LLL_LOCK_INITIALIZER)    \
 		       : "memory");					      \
      ret; })
 
-
-#define lll_mutex_cond_trylock(futex) \
+#define lll_cond_trylock(futex) \
   ({ int ret;								      \
      __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
 		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_MUTEX_LOCK_INITIALIZER_WAITERS),	      \
-			 "m" (futex), "0" (LLL_MUTEX_LOCK_INITIALIZER)	      \
+		       : "r" (LLL_LOCK_INITIALIZER_WAITERS),		      \
+			 "m" (futex), "0" (LLL_LOCK_INITIALIZER)	      \
 		       : "memory");					      \
      ret; })
 
-
-#define lll_mutex_lock(futex) \
-  (void) ({ int ignore1, ignore2, ignore3;				      \
-	    __asm __volatile (LOCK_INSTR "cmpxchgl %0, %2\n\t"		      \
+#if defined NOT_IN_libc || defined UP
+# define __lll_lock_asm_start LOCK_INSTR "cmpxchgl %4, %2\n\t"		      \
+			      "jnz 1f\n\t"
+#else
+# define __lll_lock_asm_start "cmpl $0, __libc_multiple_threads(%%rip)\n\t"   \
+			      "je 0f\n\t"				      \
+			      "lock; cmpxchgl %4, %2\n\t"		      \
 			      "jnz 1f\n\t"				      \
-			      ".subsection 1\n\t"			      \
-			      ".type  _L_mutex_lock_%=, @function\n"	      \
-			      "_L_mutex_lock_%=:\n"			      \
-			      "1:\tleaq %2, %%rdi\n"			      \
-			      "2:\tsubq $128, %%rsp\n"			      \
-			      "3:\tcallq __lll_mutex_lock_wait\n"	      \
-			      "4:\taddq $128, %%rsp\n"			      \
-			      "5:\tjmp 24f\n"				      \
-			      "6:\t.size _L_mutex_lock_%=, 6b-1b\n\t"	      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_5			      \
-			      "24:"					      \
-			      : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),\
-				"=a" (ignore3)				      \
-			      : "0" (1), "m" (futex), "3" (0)		      \
-			      : "cx", "r11", "cc", "memory"); })
-
-
-#define lll_robust_mutex_lock(futex, id) \
+		  	      "jmp 24f\n"				      \
+			      "0:\tcmpxchgl %4, %2\n\t"			      \
+			      "jnz 1f\n\t"
+#endif
+
+#define lll_lock(futex, private) \
+  (void)								      \
+    ({ int ignore1, ignore2, ignore3;					      \
+       if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	 __asm __volatile (__lll_lock_asm_start				      \
+			   ".subsection 1\n\t"				      \
+			   ".type _L_lock_%=, @function\n"		      \
+			   "_L_lock_%=:\n"				      \
+			   "1:\tleaq %2, %%rdi\n"			      \
+			   "2:\tsubq $128, %%rsp\n"			      \
+			   "3:\tcallq __lll_lock_wait_private\n"	      \
+			   "4:\taddq $128, %%rsp\n"			      \
+			   "5:\tjmp 24f\n"				      \
+			   "6:\t.size _L_lock_%=, 6b-1b\n\t"		      \
+			   ".previous\n"				      \
+			   LLL_STUB_UNWIND_INFO_5			      \
+			   "24:"					      \
+			   : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),   \
+			     "=a" (ignore3)				      \
+			   : "0" (1), "m" (futex), "3" (0)		      \
+			   : "cx", "r11", "cc", "memory");		      \
+       else								      \
+	 __asm __volatile (__lll_lock_asm_start				      \
+			   ".subsection 1\n\t"				      \
+			   ".type _L_lock_%=, @function\n"		      \
+			   "_L_lock_%=:\n"				      \
+			   "1:\tleaq %2, %%rdi\n"			      \
+			   "2:\tsubq $128, %%rsp\n"			      \
+			   "3:\tcallq __lll_lock_wait\n"		      \
+			   "4:\taddq $128, %%rsp\n"			      \
+			   "5:\tjmp 24f\n"				      \
+			   "6:\t.size _L_lock_%=, 6b-1b\n\t"		      \
+			   ".previous\n"				      \
+			   LLL_STUB_UNWIND_INFO_5			      \
+			   "24:"					      \
+			   : "=S" (ignore1), "=D" (ignore2), "=m" (futex),    \
+			     "=a" (ignore3)				      \
+			   : "1" (1), "m" (futex), "3" (0), "0" (private)     \
+			   : "cx", "r11", "cc", "memory");		      \
+    })									      \
+
+#define lll_robust_lock(futex, id, private) \
   ({ int result, ignore1, ignore2;					      \
-    __asm __volatile (LOCK_INSTR "cmpxchgl %0, %2\n\t"			      \
+    __asm __volatile (LOCK_INSTR "cmpxchgl %4, %2\n\t"			      \
 		      "jnz 1f\n\t"					      \
 		      ".subsection 1\n\t"				      \
-		      ".type  _L_robust_mutex_lock_%=, @function\n"	      \
-		      "_L_robust_mutex_lock_%=:\n"			      \
+		      ".type _L_robust_lock_%=, @function\n"		      \
+		      "_L_robust_lock_%=:\n"				      \
 		      "1:\tleaq %2, %%rdi\n"				      \
 		      "2:\tsubq $128, %%rsp\n"				      \
-		      "3:\tcallq __lll_robust_mutex_lock_wait\n"	      \
+		      "3:\tcallq __lll_robust_lock_wait\n"		      \
 		      "4:\taddq $128, %%rsp\n"				      \
 		      "5:\tjmp 24f\n"					      \
-		      "6:\t.size _L_robust_mutex_lock_%=, 6b-1b\n\t"	      \
+		      "6:\t.size _L_robust_lock_%=, 6b-1b\n\t"		      \
 		      ".previous\n"					      \
 		      LLL_STUB_UNWIND_INFO_5				      \
 		      "24:"						      \
-		      : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),	      \
+		      : "=S" (ignore1), "=D" (ignore2), "=m" (futex),	      \
 			"=a" (result)					      \
-		      : "0" (id), "m" (futex), "3" (0)			      \
+		      : "1" (id), "m" (futex), "3" (0), "0" (private)	      \
 		      : "cx", "r11", "cc", "memory");			      \
     result; })
 
-
-#define lll_mutex_cond_lock(futex) \
-  (void) ({ int ignore1, ignore2, ignore3;				      \
-	    __asm __volatile (LOCK_INSTR "cmpxchgl %0, %2\n\t"		      \
-			      "jnz 1f\n\t"				      \
-			      ".subsection 1\n\t"			      \
-			      ".type  _L_mutex_cond_lock_%=, @function\n"     \
-			      "_L_mutex_cond_lock_%=:\n"		      \
-			      "1:\tleaq %2, %%rdi\n"			      \
-			      "2:\tsubq $128, %%rsp\n"			      \
-			      "3:\tcallq __lll_mutex_lock_wait\n"	      \
-			      "4:\taddq $128, %%rsp\n"			      \
-			      "5:\tjmp 24f\n"				      \
-			      "6:\t.size _L_mutex_cond_lock_%=, 6b-1b\n\t"    \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_5			      \
-			      "24:"					      \
-			      : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),\
-				"=a" (ignore3)				      \
-			      : "0" (2), "m" (futex), "3" (0)		      \
-			      : "cx", "r11", "cc", "memory"); })
-
-
-#define lll_robust_mutex_cond_lock(futex, id) \
+#define lll_cond_lock(futex, private) \
+  (void)								      \
+    ({ int ignore1, ignore2, ignore3;					      \
+       __asm __volatile (LOCK_INSTR "cmpxchgl %4, %2\n\t"		      \
+			 "jnz 1f\n\t"					      \
+			 ".subsection 1\n\t"				      \
+			 ".type _L_cond_lock_%=, @function\n"		      \
+			 "_L_cond_lock_%=:\n"				      \
+			 "1:\tleaq %2, %%rdi\n"				      \
+			 "2:\tsubq $128, %%rsp\n"			      \
+			 "3:\tcallq __lll_lock_wait\n"			      \
+			 "4:\taddq $128, %%rsp\n"			      \
+			 "5:\tjmp 24f\n"				      \
+			 "6:\t.size _L_cond_lock_%=, 6b-1b\n\t"		      \
+			 ".previous\n"					      \
+			 LLL_STUB_UNWIND_INFO_5				      \
+			 "24:"						      \
+			 : "=S" (ignore1), "=D" (ignore2), "=m" (futex),      \
+			   "=a" (ignore3)				      \
+			 : "1" (2), "m" (futex), "3" (0), "0" (private)	      \
+			 : "cx", "r11", "cc", "memory");		      \
+    })
+
+#define lll_robust_cond_lock(futex, id, private) \
   ({ int result, ignore1, ignore2;					      \
-    __asm __volatile (LOCK_INSTR "cmpxchgl %0, %2\n\t"			      \
+    __asm __volatile (LOCK_INSTR "cmpxchgl %4, %2\n\t"			      \
 		      "jnz 1f\n\t"					      \
 		      ".subsection 1\n\t"				      \
-		      ".type  _L_robust_mutex_cond_lock_%=, @function\n"      \
-		      "_L_robust_mutex_cond_lock_%=:\n"			      \
+		      ".type _L_robust_cond_lock_%=, @function\n"	      \
+		      "_L_robust_cond_lock_%=:\n"			      \
 		      "1:\tleaq %2, %%rdi\n"				      \
 		      "2:\tsubq $128, %%rsp\n"				      \
-		      "3:\tcallq __lll_robust_mutex_lock_wait\n"	      \
+		      "3:\tcallq __lll_robust_lock_wait\n"		      \
 		      "4:\taddq $128, %%rsp\n"				      \
 		      "5:\tjmp 24f\n"					      \
-		      "6:\t.size _L_robust_mutex_cond_lock_%=, 6b-1b\n\t"     \
+		      "6:\t.size _L_robust_cond_lock_%=, 6b-1b\n\t"	      \
 		      ".previous\n"					      \
 		      LLL_STUB_UNWIND_INFO_5				      \
 		      "24:"						      \
-		      : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),	      \
+		      : "=S" (ignore1), "=D" (ignore2), "=m" (futex),	      \
 			"=a" (result)					      \
-		      : "0" (id | FUTEX_WAITERS), "m" (futex), "3" (0)	      \
+		      : "1" (id | FUTEX_WAITERS), "m" (futex), "3" (0),	      \
+			"0" (private)					      \
 		      : "cx", "r11", "cc", "memory");			      \
     result; })
 
-
-#define lll_mutex_timedlock(futex, timeout) \
+#define lll_timedlock(futex, timeout, private) \
   ({ int result, ignore1, ignore2, ignore3;				      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %2, %4\n\t"			      \
+     __asm __volatile (LOCK_INSTR "cmpxchgl %1, %4\n\t"			      \
 		       "jnz 1f\n\t"					      \
 		       ".subsection 1\n\t"				      \
-		       ".type  _L_mutex_timedlock_%=, @function\n"	      \
-		       "_L_mutex_timedlock_%=:\n"			      \
+		       ".type _L_timedlock_%=, @function\n"		      \
+		       "_L_timedlock_%=:\n"				      \
 		       "1:\tleaq %4, %%rdi\n"				      \
 		       "0:\tmovq %8, %%rdx\n"				      \
 		       "2:\tsubq $128, %%rsp\n"				      \
-		       "3:\tcallq __lll_mutex_timedlock_wait\n"		      \
+		       "3:\tcallq __lll_timedlock_wait\n"		      \
 		       "4:\taddq $128, %%rsp\n"				      \
 		       "5:\tjmp 24f\n"					      \
-		       "6:\t.size _L_mutex_timedlock_%=, 6b-1b\n\t"	      \
+		       "6:\t.size _L_timedlock_%=, 6b-1b\n\t"		      \
 		       ".previous\n"					      \
 		       LLL_STUB_UNWIND_INFO_6				      \
 		       "24:"						      \
-		       : "=a" (result), "=&D" (ignore1), "=S" (ignore2),      \
+		       : "=a" (result), "=D" (ignore1), "=S" (ignore2),	      \
 			 "=&d" (ignore3), "=m" (futex)			      \
-		       : "0" (0), "2" (1), "m" (futex), "m" (timeout)	      \
+		       : "0" (0), "1" (1), "m" (futex), "m" (timeout),	      \
+			 "2" (private)					      \
 		       : "memory", "cx", "cc", "r10", "r11");		      \
      result; })
 
-
-#define lll_robust_mutex_timedlock(futex, timeout, id) \
+#define lll_robust_timedlock(futex, timeout, id, private) \
   ({ int result, ignore1, ignore2, ignore3;				      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %2, %4\n\t"			      \
+     __asm __volatile (LOCK_INSTR "cmpxchgl %1, %4\n\t"			      \
 		       "jnz 1f\n\t"					      \
 		       ".subsection 1\n\t"				      \
-		       ".type  _L_robust_mutex_timedlock_%=, @function\n"     \
-		       "_L_robust_mutex_timedlock_%=:\n"		      \
+		       ".type _L_robust_timedlock_%=, @function\n"	      \
+		       "_L_robust_timedlock_%=:\n"			      \
 		       "1:\tleaq %4, %%rdi\n"				      \
 		       "0:\tmovq %8, %%rdx\n"				      \
 		       "2:\tsubq $128, %%rsp\n"				      \
-		       "3:\tcallq __lll_robust_mutex_timedlock_wait\n"	      \
+		       "3:\tcallq __lll_robust_timedlock_wait\n"	      \
 		       "4:\taddq $128, %%rsp\n"				      \
 		       "5:\tjmp 24f\n"					      \
-		       "6:\t.size _L_robust_mutex_timedlock_%=, 6b-1b\n\t"    \
+		       "6:\t.size _L_robust_timedlock_%=, 6b-1b\n\t"	      \
 		       ".previous\n"					      \
 		       LLL_STUB_UNWIND_INFO_6				      \
 		       "24:"						      \
-		       : "=a" (result), "=&D" (ignore1), "=S" (ignore2),      \
+		       : "=a" (result), "=D" (ignore1), "=S" (ignore2),       \
 			 "=&d" (ignore3), "=m" (futex)			      \
-		       : "0" (0), "2" (id), "m" (futex), "m" (timeout)	      \
+		       : "0" (0), "1" (id), "m" (futex), "m" (timeout),	      \
+			 "2" (private)					      \
 		       : "memory", "cx", "cc", "r10", "r11");		      \
      result; })
 
+#if defined NOT_IN_libc || defined UP
+# define __lll_unlock_asm_start LOCK_INSTR "decl %0\n\t"		      \
+				"jne 1f\n\t"
+#else
+# define __lll_unlock_asm_start "cmpl $0, __libc_multiple_threads(%%rip)\n\t" \
+				"je 0f\n\t"				      \
+				"lock; decl %0\n\t"			      \
+				"jne 1f\n\t"				      \
+				"jmp 24f\n\t"				      \
+				"0:\tdecl %0\n\t"			      \
+				"jne 1f\n\t"
+#endif
 
-#define lll_mutex_unlock(futex) \
-  (void) ({ int ignore;							      \
-            __asm __volatile (LOCK_INSTR "decl %0\n\t"			      \
-			      "jne 1f\n\t"				      \
-			      ".subsection 1\n\t"			      \
-			      ".type  _L_mutex_unlock_%=, @function\n"	      \
-			      "_L_mutex_unlock_%=:\n"			      \
-			      "1:\tleaq %0, %%rdi\n"			      \
-			      "2:\tsubq $128, %%rsp\n"			      \
-			      "3:\tcallq __lll_mutex_unlock_wake\n"	      \
-			      "4:\taddq $128, %%rsp\n"			      \
-			      "5:\tjmp 24f\n"				      \
-			      "6:\t.size _L_mutex_unlock_%=, 6b-1b\n\t"	      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_5			      \
-			      "24:"					      \
-			      : "=m" (futex), "=&D" (ignore)		      \
-			      : "m" (futex)				      \
-			      : "ax", "cx", "r11", "cc", "memory"); })
-
-
-#define lll_robust_mutex_unlock(futex) \
-  (void) ({ int ignore;							      \
-	    __asm __volatile (LOCK_INSTR "andl %2, %0\n\t"		      \
-			      "jne 1f\n\t"				      \
-			      ".subsection 1\n\t"			      \
-			      ".type  _L_robust_mutex_unlock_%=, @function\n" \
-			      "_L_robust_mutex_unlock_%=:\n"		      \
-			      "1:\tleaq %0, %%rdi\n"			      \
-			      "2:\tsubq $128, %%rsp\n"			      \
-			      "3:\tcallq __lll_mutex_unlock_wake\n"	      \
-			      "4:\taddq $128, %%rsp\n"			      \
-			      "5:\tjmp 24f\n"				      \
-			      "6:\t.size _L_robust_mutex_unlock_%=, 6b-1b\n\t"\
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_5			      \
-			      "24:"					      \
-			      : "=m" (futex), "=&D" (ignore)		      \
-			      : "i" (FUTEX_WAITERS), "m" (futex)	      \
-			      : "ax", "cx", "r11", "cc", "memory"); })
-
-
-#define lll_robust_mutex_dead(futex) \
-  (void) ({ int ignore;		     \
-	    __asm __volatile (LOCK_INSTR "orl %3, (%2)\n\t"		      \
-			      "syscall"					      \
-			      : "=m" (futex), "=a" (ignore)		      \
-			      : "D" (&(futex)), "i" (FUTEX_OWNER_DIED),	      \
-				"S" (FUTEX_WAKE), "1" (__NR_futex),	      \
-				"d" (1)					      \
-			      : "cx", "r11", "cc", "memory"); })
-
+#define lll_unlock(futex, private) \
+  (void)								      \
+    ({ int ignore;							      \
+       if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	 __asm __volatile (__lll_unlock_asm_start			      \
+			   ".subsection 1\n\t"				      \
+			   ".type _L_unlock_%=, @function\n"		      \
+			   "_L_unlock_%=:\n"				      \
+			   "1:\tleaq %0, %%rdi\n"			      \
+			   "2:\tsubq $128, %%rsp\n"			      \
+			   "3:\tcallq __lll_unlock_wake_private\n"	      \
+			   "4:\taddq $128, %%rsp\n"			      \
+			   "5:\tjmp 24f\n"				      \
+			   "6:\t.size _L_unlock_%=, 6b-1b\n\t"		      \
+			   ".previous\n"				      \
+			   LLL_STUB_UNWIND_INFO_5			      \
+			   "24:"					      \
+			   : "=m" (futex), "=&D" (ignore)		      \
+			   : "m" (futex)				      \
+			   : "ax", "cx", "r11", "cc", "memory");	      \
+       else								      \
+	 __asm __volatile (__lll_unlock_asm_start			      \
+			   ".subsection 1\n\t"				      \
+			   ".type _L_unlock_%=, @function\n"		      \
+			   "_L_unlock_%=:\n"				      \
+			   "1:\tleaq %0, %%rdi\n"			      \
+			   "2:\tsubq $128, %%rsp\n"			      \
+			   "3:\tcallq __lll_unlock_wake\n"		      \
+			   "4:\taddq $128, %%rsp\n"			      \
+			   "5:\tjmp 24f\n"				      \
+			   "6:\t.size _L_unlock_%=, 6b-1b\n\t"		      \
+			   ".previous\n"				      \
+			   LLL_STUB_UNWIND_INFO_5			      \
+			   "24:"					      \
+			   : "=m" (futex), "=&D" (ignore)		      \
+			   : "m" (futex), "S" (private)			      \
+			   : "ax", "cx", "r11", "cc", "memory");	      \
+    })
+
+#define lll_robust_unlock(futex, private) \
+  do									      \
+    {									      \
+      int ignore;							      \
+      __asm __volatile (LOCK_INSTR "andl %2, %0\n\t"			      \
+			"jne 1f\n\t"					      \
+			".subsection 1\n\t"				      \
+			".type _L_robust_unlock_%=, @function\n"	      \
+			"_L_robust_unlock_%=:\n"			      \
+			"1:\tleaq %0, %%rdi\n"				      \
+			"2:\tsubq $128, %%rsp\n"			      \
+			"3:\tcallq __lll_unlock_wake\n"			      \
+			"4:\taddq $128, %%rsp\n"			      \
+			"5:\tjmp 24f\n"					      \
+			"6:\t.size _L_robust_unlock_%=, 6b-1b\n\t"	      \
+			".previous\n"					      \
+			LLL_STUB_UNWIND_INFO_5				      \
+			"24:"						      \
+			: "=m" (futex), "=&D" (ignore)			      \
+			: "i" (FUTEX_WAITERS), "m" (futex),		      \
+			  "S" (private)					      \
+			: "ax", "cx", "r11", "cc", "memory");		      \
+    }									      \
+  while (0)
+
+#define lll_robust_dead(futex, private) \
+  do									      \
+    {									      \
+      int ignore;							      \
+      __asm __volatile (LOCK_INSTR "orl %3, (%2)\n\t"			      \
+			"syscall"					      \
+			: "=m" (futex), "=a" (ignore)			      \
+			: "D" (&(futex)), "i" (FUTEX_OWNER_DIED),	      \
+			  "S" (__lll_private_flag (FUTEX_WAKE, private)),     \
+			  "1" (__NR_futex), "d" (1)			      \
+			: "cx", "r11", "cc", "memory");			      \
+    }									      \
+  while (0)
 
 /* Returns non-zero if error happened, zero if success.  */
 #define lll_futex_requeue(ftx, nr_wake, nr_move, mutex, val) \
@@ -461,117 +541,13 @@ extern int __lll_mutex_unlock_wait (int *__futex) attribute_hidden;
      __asm __volatile ("syscall"					      \
 		       : "=a" (__res)					      \
 		       : "0" (__NR_futex), "D" ((void *) ftx),		      \
-		         "S" (FUTEX_CMP_REQUEUE), "d" (nr_wake),	      \
-		         "r" (__nr_move), "r" (__mutex), "r" (__val)	      \
+			 "S" (FUTEX_CMP_REQUEUE), "d" (nr_wake),	      \
+			 "r" (__nr_move), "r" (__mutex), "r" (__val)	      \
 		       : "cx", "r11", "cc", "memory");			      \
      __res < 0; })
 
-
-#define lll_mutex_islocked(futex) \
-  (futex != LLL_MUTEX_LOCK_INITIALIZER)
-
-
-/* We have a separate internal lock implementation which is not tied
-   to binary compatibility.  */
-
-/* Type for lock object.  */
-typedef int lll_lock_t;
-
-/* Initializers for lock.  */
-#define LLL_LOCK_INITIALIZER		(0)
-#define LLL_LOCK_INITIALIZER_LOCKED	(1)
-
-
-/* The states of a lock are:
-    0  -  untaken
-    1  -  taken by one user
-    2  -  taken by more users */
-
-
-#if defined NOT_IN_libc || defined UP
-# define lll_trylock(futex) lll_mutex_trylock (futex)
-# define lll_lock(futex) lll_mutex_lock (futex)
-# define lll_unlock(futex) lll_mutex_unlock (futex)
-#else
-/* Special versions of the macros for use in libc itself.  They avoid
-   the lock prefix when the thread library is not used.
-
-   The code sequence to avoid unnecessary lock prefixes is what the AMD
-   guys suggested.  If you do not like it, bring it up with AMD.
-
-   XXX In future we might even want to avoid it on UP machines.  */
-
-# define lll_trylock(futex) \
-  ({ unsigned char ret;							      \
-     __asm __volatile ("cmpl $0, __libc_multiple_threads(%%rip)\n\t"	      \
-		       "je 0f\n\t"					      \
-		       "lock; cmpxchgl %2, %1\n\t"			      \
-		       "jmp 1f\n"					      \
-		       "0:\tcmpxchgl %2, %1\n\t"			      \
-		       "1:setne %0"					      \
-		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_MUTEX_LOCK_INITIALIZER_LOCKED), "m" (futex),\
-			 "0" (LLL_MUTEX_LOCK_INITIALIZER)		      \
-		       : "memory");					      \
-     ret; })
-
-
-# define lll_lock(futex) \
-  (void) ({ int ignore1, ignore2, ignore3;				      \
-	    __asm __volatile ("cmpl $0, __libc_multiple_threads(%%rip)\n\t"   \
-			      "je 0f\n\t"				      \
-			      "lock; cmpxchgl %0, %2\n\t"		      \
-			      "jnz 1f\n\t"				      \
-		  	      "jmp 24f\n"				      \
-			      "0:\tcmpxchgl %0, %2\n\t"			      \
-			      "jnz 1f\n\t"				      \
-			      ".subsection 1\n\t"			      \
-			      ".type  _L_lock_%=, @function\n"		      \
-			      "_L_lock_%=:\n"				      \
-			      "1:\tleaq %2, %%rdi\n"			      \
-			      "2:\tsubq $128, %%rsp\n"			      \
-			      "3:\tcallq __lll_mutex_lock_wait\n"	      \
-			      "4:\taddq $128, %%rsp\n"			      \
-			      "5:\tjmp 24f\n"				      \
-			      "6:\t.size _L_lock_%=, 6b-1b\n\t"		      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_5			      \
-			      "24:"					      \
-			      : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),\
-				"=a" (ignore3)				      \
-			      : "0" (1), "m" (futex), "3" (0)		      \
-			      : "cx", "r11", "cc", "memory"); })
-
-
-# define lll_unlock(futex) \
-  (void) ({ int ignore;							      \
-            __asm __volatile ("cmpl $0, __libc_multiple_threads(%%rip)\n\t"   \
-			      "je 0f\n\t"				      \
-			      "lock; decl %0\n\t"			      \
-			      "jne 1f\n\t"				      \
-		  	      "jmp 24f\n"				      \
-			      "0:\tdecl %0\n\t"				      \
-			      "jne 1f\n\t"				      \
-			      ".subsection 1\n\t"			      \
-			      ".type  _L_unlock_%=, @function\n"	      \
-			      "_L_unlock_%=:\n"				      \
-			      "1:\tleaq %0, %%rdi\n"			      \
-			      "2:\tsubq $128, %%rsp\n"			      \
-			      "3:\tcallq __lll_mutex_unlock_wake\n"	      \
-			      "4:\taddq $128, %%rsp\n"			      \
-			      "5:\tjmp 24f\n"				      \
-			      "6:\t.size _L_unlock_%=, 6b-1b\n\t"	      \
-			      ".previous\n"				      \
-			      LLL_STUB_UNWIND_INFO_5			      \
-			      "24:"					      \
-			      : "=m" (futex), "=&D" (ignore)		      \
-			      : "m" (futex)				      \
-			      : "ax", "cx", "r11", "cc", "memory"); })
-#endif
-
-
 #define lll_islocked(futex) \
-  (futex != LLL_MUTEX_LOCK_INITIALIZER)
+  (futex != LLL_LOCK_INITIALIZER)
 
 
 /* The kernel notifies a process with uses CLONE_CLEARTID via futex
@@ -610,25 +586,6 @@ extern int __lll_timedwait_tid (int *tid, const struct timespec *abstime)
       }									      \
     __result; })
 
-
-/* Conditional variable handling.  */
-
-extern void __lll_cond_wait (pthread_cond_t *cond) attribute_hidden;
-extern int __lll_cond_timedwait (pthread_cond_t *cond,
-				 const struct timespec *abstime)
-     attribute_hidden;
-extern void __lll_cond_wake (pthread_cond_t *cond) attribute_hidden;
-extern void __lll_cond_broadcast (pthread_cond_t *cond) attribute_hidden;
-
-
-#define lll_cond_wait(cond) \
-  __lll_cond_wait (cond)
-#define lll_cond_timedwait(cond, abstime) \
-  __lll_cond_timedwait (cond, abstime)
-#define lll_cond_wake(cond) \
-  __lll_cond_wake (cond)
-#define lll_cond_broadcast(cond) \
-  __lll_cond_broadcast (cond)
-
+#endif  /* !__ASSEMBLER__ */
 
 #endif	/* lowlevellock.h */
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
index 69243950d7..fa7516ef71 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -19,33 +20,40 @@
 
 #include <sysdep.h>
 #include <pthread-errnos.h>
+#include <lowlevellock.h>
 #include <lowlevelrobustlock.h>
+#include <kernel-features.h>
 
 	.text
 
-#ifndef LOCK
-# ifdef UP
-#  define LOCK
+#define FUTEX_WAITERS		0x80000000
+#define FUTEX_OWNER_DIED	0x40000000
+
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define LOAD_FUTEX_WAIT(reg) \
+	xorl	$(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+#else
+# if FUTEX_WAIT == 0
+#  define LOAD_FUTEX_WAIT(reg) \
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%fs:PRIVATE_FUTEX, reg
 # else
-#  define LOCK lock
+#  define LOAD_FUTEX_WAIT(reg) \
+	xorl	$FUTEX_PRIVATE_FLAG, reg ; \
+	andl	%fs:PRIVATE_FUTEX, reg ; \
+	orl	$FUTEX_WAIT, reg
 # endif
 #endif
 
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_WAITERS		0x80000000
-#define FUTEX_OWNER_DIED	0x40000000
-
 /* For the calculation see asm/vsyscall.h.  */
 #define VSYSCALL_ADDR_vgettimeofday	0xffffffffff600000
 
 
-	.globl	__lll_robust_mutex_lock_wait
-	.type	__lll_robust_mutex_lock_wait,@function
-	.hidden	__lll_robust_mutex_lock_wait
+	.globl	__lll_robust_lock_wait
+	.type	__lll_robust_lock_wait,@function
+	.hidden	__lll_robust_lock_wait
 	.align	16
-__lll_robust_mutex_lock_wait:
+__lll_robust_lock_wait:
 	cfi_startproc
 	pushq	%r10
 	cfi_adjust_cfa_offset(8)
@@ -55,11 +63,7 @@ __lll_robust_mutex_lock_wait:
 	cfi_offset(%rdx, -24)
 
 	xorq	%r10, %r10	/* No timeout.  */
-#if FUTEX_WAIT == 0
-	xorl	%esi, %esi
-#else
-	movl	$FUTEX_WAIT, %esi
-#endif
+	LOAD_FUTEX_WAIT (%esi)
 
 4:	movl	%eax, %edx
 	orl	$FUTEX_WAITERS, %edx
@@ -97,14 +101,14 @@ __lll_robust_mutex_lock_wait:
 	cfi_restore(%r10)
 	retq
 	cfi_endproc
-	.size	__lll_robust_mutex_lock_wait,.-__lll_robust_mutex_lock_wait
+	.size	__lll_robust_lock_wait,.-__lll_robust_lock_wait
 
 
-	.globl	__lll_robust_mutex_timedlock_wait
-	.type	__lll_robust_mutex_timedlock_wait,@function
-	.hidden	__lll_robust_mutex_timedlock_wait
+	.globl	__lll_robust_timedlock_wait
+	.type	__lll_robust_timedlock_wait,@function
+	.hidden	__lll_robust_timedlock_wait
 	.align	16
-__lll_robust_mutex_timedlock_wait:
+__lll_robust_timedlock_wait:
 	cfi_startproc
 	/* Check for a valid timeout value.  */
 	cmpq	$1000000000, 8(%rdx)
@@ -122,10 +126,12 @@ __lll_robust_mutex_timedlock_wait:
 	cfi_offset(%r9, -24)
 	cfi_offset(%r12, -32)
 	cfi_offset(%r13, -40)
+	pushq	%rsi
+	cfi_adjust_cfa_offset(8)
 
 	/* Stack frame for the timespec and timeval structs.  */
-	subq	$24, %rsp
-	cfi_adjust_cfa_offset(24)
+	subq	$32, %rsp
+	cfi_adjust_cfa_offset(32)
 
 	movq	%rdi, %r12
 	movq	%rdx, %r13
@@ -174,11 +180,8 @@ __lll_robust_mutex_timedlock_wait:
 	jnz	5f
 
 2:	movq	%rsp, %r10
-#if FUTEX_WAIT == 0
-	xorl	%esi, %esi
-#else
-	movl	$FUTEX_WAIT, %esi
-#endif
+	movl	32(%rsp), %esi
+	LOAD_FUTEX_WAIT (%esi)
 	movq	%r12, %rdi
 	movl	$SYS_futex, %eax
 	syscall
@@ -195,8 +198,8 @@ __lll_robust_mutex_timedlock_wait:
 	cmpxchgl %edx, (%r12)
 	jnz	7f
 
-6:	addq	$24, %rsp
-	cfi_adjust_cfa_offset(-24)
+6:	addq	$40, %rsp
+	cfi_adjust_cfa_offset(-40)
 	popq	%r13
 	cfi_adjust_cfa_offset(-8)
 	cfi_restore(%r13)
@@ -214,7 +217,7 @@ __lll_robust_mutex_timedlock_wait:
 3:	movl	$EINVAL, %eax
 	retq
 
-	cfi_adjust_cfa_offset(56)
+	cfi_adjust_cfa_offset(72)
 	cfi_offset(%r8, -16)
 	cfi_offset(%r9, -24)
 	cfi_offset(%r12, -32)
@@ -226,4 +229,4 @@ __lll_robust_mutex_timedlock_wait:
 8:	movl	$ETIMEDOUT, %eax
 	jmp	6b
 	cfi_endproc
-	.size	__lll_robust_mutex_timedlock_wait,.-__lll_robust_mutex_timedlock_wait
+	.size	__lll_robust_timedlock_wait,.-__lll_robust_timedlock_wait
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S
index 63771b3840..15ad534fa0 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S
@@ -18,18 +18,9 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelbarrier.h>
 
-#define SYS_futex	202
-#define FUTEX_WAIT	0
-#define FUTEX_WAKE	1
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
 
 	.text
 
@@ -142,21 +133,29 @@ pthread_barrier_wait:
 
 	retq
 
-1:	addq	$MUTEX, %rdi
-	callq	__lll_mutex_lock_wait
+1:	movl	PRIVATE(%rdi), %esi
+	addq	$MUTEX, %rdi
+	xorl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 	subq	$MUTEX, %rdi
 	jmp	2b
 
-4:	addq	$MUTEX, %rdi
-	callq	__lll_mutex_unlock_wake
+4:	movl	PRIVATE(%rdi), %esi
+	addq	$MUTEX, %rdi
+	xorl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	5b
 
-6:	addq	$MUTEX, %rdi
-	callq	__lll_mutex_unlock_wake
+6:	movl	PRIVATE(%rdi), %esi
+	addq	$MUTEX, %rdi
+	xorl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	subq	$MUTEX, %rdi
 	jmp	7b
 
-9:	addq	$MUTEX, %rdi
-	callq	__lll_mutex_unlock_wake
+9:	movl	PRIVATE(%rdi), %esi
+	addq	$MUTEX, %rdi
+	xorl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	10b
 	.size	pthread_barrier_wait,.-pthread_barrier_wait
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
index 006de2696e..0c619bf271 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -19,23 +20,11 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <kernel-features.h>
 #include <pthread-pi-defines.h>
-
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_REQUEUE		3
-#define FUTEX_CMP_REQUEUE	4
-
-#define EINVAL			22
+#include <pthread-errnos.h>
 
 
 	.text
@@ -115,7 +104,9 @@ __pthread_cond_broadcast:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 #if cond_lock != 0
 	subq	$cond_lock, %rdi
 #endif
@@ -123,12 +114,16 @@ __pthread_cond_broadcast:
 
 	/* Unlock in loop requires wakeup.  */
 5:	addq	$cond_lock-cond_futex, %rdi
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	6b
 
 	/* Unlock in loop requires wakeup.  */
 7:	addq	$cond_lock-cond_futex, %rdi
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	subq	$cond_lock-cond_futex, %rdi
 	jmp	8b
 
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
index 3dbb9e81e3..2fc9d1fad7 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -19,23 +19,10 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <kernel-features.h>
-
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_WAKE_OP		5
-
-#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE	((4 << 24) | 1)
-
-#define EINVAL			22
+#include <pthread-errnos.h>
 
 
 	.text
@@ -111,7 +98,9 @@ __pthread_cond_signal:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 #if cond_lock != 0
 	subq	$cond_lock, %rdi
 #endif
@@ -120,7 +109,9 @@ __pthread_cond_signal:
 	/* Unlock in loop requires wakeup.  */
 5:
 	movq	%r8, %rdi
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	6b
 	.size	__pthread_cond_signal, .-__pthread_cond_signal
 versioned_symbol (libpthread, __pthread_cond_signal, pthread_cond_signal,
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 2afd601b8c..003069fb6b 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -19,19 +19,10 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <pthread-errnos.h>
 
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
 /* For the calculation see asm/vsyscall.h.  */
 #define VSYSCALL_ADDR_vgettimeofday	0xffffffffff600000
 
@@ -301,7 +292,9 @@ __pthread_cond_timedwait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 	jmp	2b
 
 	/* Unlock in loop requires wakeup.  */
@@ -309,7 +302,9 @@ __pthread_cond_timedwait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	4b
 
 	/* Locking in loop failed.  */
@@ -317,7 +312,9 @@ __pthread_cond_timedwait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 #if cond_lock != 0
 	subq	$cond_lock, %rdi
 #endif
@@ -328,7 +325,9 @@ __pthread_cond_timedwait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	11b
 
 	/* The initial unlocking of the mutex failed.  */
@@ -345,7 +344,9 @@ __pthread_cond_timedwait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 
 17:	movq	(%rsp), %rax
 	jmp	18b
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index aaad22e020..34ef2c7b77 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -19,19 +19,10 @@
 
 #include <sysdep.h>
 #include <shlib-compat.h>
+#include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <tcb-offsets.h>
 
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-
 
 	.text
 
@@ -58,7 +49,9 @@ __condvar_cleanup:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 #if cond_lock != 0
 	subq	$cond_lock, %rdi
 #endif
@@ -105,7 +98,9 @@ __condvar_cleanup:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 
 	/* Wake up all waiters to make sure no signal gets lost.  */
 2:	testq	%r12, %r12
@@ -307,7 +302,9 @@ __pthread_cond_wait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 	jmp	2b
 
 	/* Unlock in loop requires wakeup.  */
@@ -315,7 +312,9 @@ __pthread_cond_wait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	4b
 
 	/* Locking in loop failed.  */
@@ -323,7 +322,9 @@ __pthread_cond_wait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_lock_wait
 #if cond_lock != 0
 	subq	$cond_lock, %rdi
 #endif
@@ -334,7 +335,9 @@ __pthread_cond_wait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 	jmp	11b
 
 	/* The initial unlocking of the mutex failed.  */
@@ -351,7 +354,9 @@ __pthread_cond_wait:
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	/* XYZ */
+	movl	$LLL_SHARED, %esi
+	callq	__lll_unlock_wake
 
 13:	movq	%r10, %rax
 	jmp	14b
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
index 7740c599d1..c3b2b51bdb 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
@@ -19,17 +19,8 @@
 
 #include <kernel-features.h>
 #include <tcb-offsets.h>
+#include <lowlevellock.h>
 
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
 
 	.comm	__fork_generation, 4, 4
 
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S
index e8d257502f..80fedd4ab1 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S
@@ -18,23 +18,12 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <kernel-features.h>
 
 
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	__pthread_rwlock_rdlock
@@ -123,11 +112,11 @@ __pthread_rwlock_rdlock:
 	movq	%rdx, %rax
 	retq
 
-1:
+1:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
@@ -139,11 +128,11 @@ __pthread_rwlock_rdlock:
 	movl	$EDEADLK, %edx
 	jmp	9b
 
-6:
+6:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
@@ -159,21 +148,21 @@ __pthread_rwlock_rdlock:
 	movl	$EAGAIN, %edx
 	jmp	9b
 
-10:
+10:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
 	jmp	11b
 
-12:
+12:	movl	PSHARED(%rdi), %esi
 #if MUTEX == 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S
index f703eeb29f..0fa2714426 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S
@@ -18,27 +18,15 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <kernel-features.h>
 
 
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
-
 /* For the calculation see asm/vsyscall.h.  */
 #define VSYSCALL_ADDR_vgettimeofday	0xffffffffff600000
 
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	pthread_rwlock_timedrdlock
@@ -172,11 +160,11 @@ pthread_rwlock_timedrdlock:
 	popq	%r12
 	retq
 
-1:
+1:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 	jmp	2b
 
 14:	cmpl	%fs:TID, %eax
@@ -184,13 +172,13 @@ pthread_rwlock_timedrdlock:
 	movl	$EDEADLK, %edx
 	jmp	9b
 
-6:
+6:	movl	PSHARED(%r12), %esi
 #if MUTEX == 0
 	movq	%r12, %rdi
 #else
 	leal	MUTEX(%r12), %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	7b
 
 	/* Overflow.  */
@@ -203,22 +191,22 @@ pthread_rwlock_timedrdlock:
 	movl	$EAGAIN, %edx
 	jmp	9b
 
-10:
+10:	movl	PSHARED(%r12), %esi
 #if MUTEX == 0
 	movq	%r12, %rdi
 #else
 	leaq	MUTEX(%r12), %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	11b
 
-12:
+12:	movl	PSHARED(%r12), %esi
 #if MUTEX == 0
 	movq	%r12, %rdi
 #else
 	leaq	MUTEX(%r12), %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 	jmp	13b
 
 16:	movq	$-ETIMEDOUT, %rdx
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S
index fc3bf3d69e..1e43933ca9 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S
@@ -18,26 +18,15 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <kernel-features.h>
 
 
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
-
 /* For the calculation see asm/vsyscall.h.  */
 #define VSYSCALL_ADDR_vgettimeofday	0xffffffffff600000
 
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	pthread_rwlock_timedwrlock
@@ -168,11 +157,11 @@ pthread_rwlock_timedwrlock:
 	popq	%r12
 	retq
 
-1:
+1:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 	jmp	2b
 
 14:	cmpl	%fs:TID, %eax
@@ -180,13 +169,13 @@ pthread_rwlock_timedwrlock:
 20:	movl	$EDEADLK, %edx
 	jmp	9b
 
-6:
+6:	movl	PSHARED(%r12), %esi
 #if MUTEX == 0
 	movq	%r12, %rdi
 #else
 	leal	MUTEX(%r12), %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	7b
 
 	/* Overflow.  */
@@ -194,22 +183,22 @@ pthread_rwlock_timedwrlock:
 	movl	$EAGAIN, %edx
 	jmp	9b
 
-10:
+10:	movl	PSHARED(%r12), %esi
 #if MUTEX == 0
 	movq	%r12, %rdi
 #else
 	leaq	MUTEX(%r12), %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	11b
 
-12:
+12:	movl	PSHARED(%r12), %esi
 #if MUTEX == 0
 	movq	%r12, %rdi
 #else
 	leaq	MUTEX(%r12), %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 	jmp	13b
 
 16:	movq	$-ETIMEDOUT, %rdx
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S
index c4597f60f7..cf7f607d9c 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S
@@ -18,22 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <kernel-features.h>
 
 
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	__pthread_rwlock_unlock
@@ -107,28 +96,28 @@ __pthread_rwlock_unlock:
 4:	xorl	%eax, %eax
 	retq
 
-1:
+1:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
 	jmp	2b
 
-3:
+3:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	4b
 
-7:
+7:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	8b
 
 	.size	__pthread_rwlock_unlock,.-__pthread_rwlock_unlock
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S
index 7f65726849..209c0e9a94 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S
@@ -18,23 +18,12 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <kernel-features.h>
 
 
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
-#define FUTEX_PRIVATE_FLAG	128
-
-#ifndef UP
-# define LOCK lock
-#else
-# define LOCK
-#endif
-
-
 	.text
 
 	.globl	__pthread_rwlock_wrlock
@@ -121,11 +110,11 @@ __pthread_rwlock_wrlock:
 	movq	%rdx, %rax
 	retq
 
-1:
+1:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
@@ -136,32 +125,32 @@ __pthread_rwlock_wrlock:
 	movl	$EDEADLK, %edx
 	jmp	9b
 
-6:
+6:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 	jmp	7b
 
 4:	decl	WRITERS_QUEUED(%rdi)
 	movl	$EAGAIN, %edx
 	jmp	9b
 
-10:
+10:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_unlock_wake
+	callq	__lll_unlock_wake
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
 	jmp	11b
 
-12:
+12:	movl	PSHARED(%rdi), %esi
 #if MUTEX != 0
 	addq	$MUTEX, %rdi
 #endif
-	callq	__lll_mutex_lock_wait
+	callq	__lll_lock_wait
 #if MUTEX != 0
 	subq	$MUTEX, %rdi
 #endif
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S
index 4919c11fd2..adbbcdfa71 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S
@@ -18,19 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <structsem.h>
 
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAKE		1
-
 
 	.text
 
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
index 4068a1b6b8..88e99cf6a1 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
@@ -18,23 +18,15 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <structsem.h>
 
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
 
 /* For the calculation see asm/vsyscall.h.  */
 #define VSYSCALL_ADDR_vgettimeofday	0xffffffffff600000
 
-
 	.text
 
 	.globl	sem_timedwait
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_trywait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_trywait.S
index 643090f065..a5719cadda 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_trywait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_trywait.S
@@ -18,15 +18,10 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
 	.text
 
 	.globl	sem_trywait
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
index e099ede029..5320a91e19 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
@@ -18,19 +18,11 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include <lowlevellock.h>
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <structsem.h>
 
-#ifndef UP
-# define LOCK lock
-#else
-# define
-#endif
-
-#define SYS_futex		202
-#define FUTEX_WAIT		0
-
 
 	.text