about summary refs log tree commit diff
path: root/nptl/sysdeps/unix
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2007-06-17 16:42:55 +0000
committerUlrich Drepper <drepper@redhat.com>2007-06-17 16:42:55 +0000
commit339dbf0e392567a5f14acc81cf94f997b8262b98 (patch)
tree1edba81b692119c53a53c5963f40eb0781681ecc /nptl/sysdeps/unix
parente0d4a4e53c831995fbb5ab4a17ea07ddc9b5ab52 (diff)
downloadglibc-339dbf0e392567a5f14acc81cf94f997b8262b98.tar.gz
glibc-339dbf0e392567a5f14acc81cf94f997b8262b98.tar.xz
glibc-339dbf0e392567a5f14acc81cf94f997b8262b98.zip
* sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S:
	(__pthread_rwlock_rdlock): Don't use non SH-3/4 instruction.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S:
	(__pthread_rwlock_wrlock): Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S:
	(pthread_rwlock_timedrdlock): Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S:
	(pthread_rwlock_timedwrlock): Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S:
	(__pthread_rwlock_unlock): Likewise.

2007-06-10  Kaz Kojima  <kkojima@rr.iij4u.or.jp>

	* sysdeps/sh/tcb-offsets.sym: Add PRIVATE_FUTEX.
	* sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h: Include endian.h.
	Split __flags into __flags, __shared, __pad1 and __pad2.
	* sysdeps/unix/sysv/linux/sh/libc-lowlevellock.S: Use private
        futexes if they are available.
	* sysdeps/unix/sysv/linux/sh/lowlevellock.S: Adjust so that change
        in libc-lowlevellock.S allow using private futexes.
	* sysdeps/unix/sysv/linux/sh/lowlevellock.h: Define
	FUTEX_PRIVATE_FLAG.  Add additional parameter to lll_futex_wait,
	lll_futex_timed_wait and lll_futex_wake.  Change lll_futex_wait
	to call lll_futex_timed_wait.  Add lll_private_futex_wait,
	lll_private_futex_timed_wait and lll_private_futex_wake.
	(lll_robust_mutex_unlock): Fix typo.
	* sysdeps/unix/sysv/linux/sh/pthread_barrier_wait.S: Use private
        field in futex command setup.
	* sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S: Use
	COND_NWAITERS_SHIFT instead of COND_CLOCK_BITS.
	* sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S: Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_once.S: Use private futexes
        if they are available.  Remove clear_once_control.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S: Use private
	futexes if they are available.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S: Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S: Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S: Likewise.
	* sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S: Likewise.
	* sysdeps/unix/sysv/linux/sh/sem_post.S: Add private futex support.
	Wake only when there are waiters.
	* sysdeps/unix/sysv/linux/sh/sem_wait.S: Add private futex
	support.  Indicate that there are waiters.  Remove unnecessary
        extra cancellation test.
	* sysdeps/unix/sysv/linux/sh/sem_timedwait.S: Likewise.  Removed
	left-over duplication of __sem_wait_cleanup.
Diffstat (limited to 'nptl/sysdeps/unix')
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h19
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/libc-lowlevellock.S30
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S31
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h85
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_barrier_wait.S10
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S14
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_once.S72
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S33
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S32
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S27
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S29
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/sem_post.S15
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/sem_timedwait.S254
-rw-r--r--nptl/sysdeps/unix/sysv/linux/sh/sem_wait.S256
16 files changed, 748 insertions, 200 deletions
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h
index 969686dd5a..badcda5701 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -20,6 +21,8 @@
 #ifndef _BITS_PTHREADTYPES_H
 #define _BITS_PTHREADTYPES_H	1
 
+#include <endian.h>
+
 #define __SIZEOF_PTHREAD_ATTR_T 36
 #define __SIZEOF_PTHREAD_MUTEX_T 24
 #define __SIZEOF_PTHREAD_MUTEXATTR_T 4
@@ -127,9 +130,21 @@ typedef union
     unsigned int __writer_wakeup;
     unsigned int __nr_readers_queued;
     unsigned int __nr_writers_queued;
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned char __pad1;
+    unsigned char __pad2;
+    unsigned char __shared;
+    /* FLAGS must stay at this position in the structure to maintain
+       binary compatibility.  */
+    unsigned char __flags;
+#else
     /* FLAGS must stay at this position in the structure to maintain
        binary compatibility.  */
-    unsigned int __flags;
+    unsigned char __flags;
+    unsigned char __shared;
+    unsigned char __pad1;
+    unsigned char __pad2;
+#endif
     pthread_t __writer;
   } __data;
   char __size[__SIZEOF_PTHREAD_RWLOCK_T];
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/libc-lowlevellock.S b/nptl/sysdeps/unix/sysv/linux/sh/libc-lowlevellock.S
index 94a24b46ee..be85ab7414 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/libc-lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/libc-lowlevellock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,4 +16,32 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <kernel-features.h>
+
+/* All locks in libc are private.  Use the kernel feature if possible.  */
+#define FUTEX_PRIVATE_FLAG	128
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define FUTEX_WAIT		(0 | FUTEX_PRIVATE_FLAG)
+# define FUTEX_WAKE		(1 | FUTEX_PRIVATE_FLAG)
+#else
+# define LOAD_FUTEX_WAIT(reg,tmp) \
+	stc	gbr, tmp	; \
+	mov.w	99f, reg	; \
+	add	reg, tmp 	; \
+	bra	98f		; \
+	 mov.l	@tmp, reg	; \
+99:	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE ; \
+98:
+	
+# define LOAD_FUTEX_WAKE(reg,tmp) \
+	stc	gbr, tmp	; \
+	mov.w	99f, reg	; \
+	add	reg, tmp 	; \
+	mov.l	@tmp, reg	; \
+	bra	98f		; \
+	 mov	#FUTEX_WAKE, tmp ; \
+99:	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE ; \
+98:	or	tmp, reg
+#endif
+
 #include "lowlevellock.S"
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S b/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S
index 3b0b66d436..b10903bcd7 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S
@@ -24,8 +24,24 @@
 
 #define SYS_gettimeofday	__NR_gettimeofday
 #define SYS_futex		240
-#define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
+#ifndef FUTEX_WAIT
+# define FUTEX_WAIT		0
+# define FUTEX_WAKE		1
+#endif
+
+#ifndef LOAD_FUTEX_WAIT
+# if FUTEX_WAIT == 0
+#  define LOAD_FUTEX_WAIT(reg,tmp) \
+	xor	reg, reg
+# else
+#  define LOAD_FUTEX_WAIT(reg,tmp) \
+	mov	#FUTEX_WAIT, reg; \
+	extu.b	reg, reg
+# endif
+# define LOAD_FUTEX_WAKE(reg,tmp) \
+	mov	#FUTEX_WAKE, reg; \
+	extu.b	reg, reg
+#endif
 
 
 	.globl	__lll_mutex_lock_wait
@@ -40,7 +56,7 @@ __lll_mutex_lock_wait:
 	mov	r4, r6
 	mov	r5, r8
 	mov	#0, r7		/* No timeout.  */
-	mov	#FUTEX_WAIT, r5
+	LOAD_FUTEX_WAIT (r5, r0)
 
 	mov	#2, r4
 	cmp/eq	r4, r6
@@ -133,7 +149,7 @@ __lll_mutex_timedlock_wait:
 	bt	8f
 
 	mov	r8, r4
-	mov	#FUTEX_WAIT, r5
+	LOAD_FUTEX_WAIT (r5, r0)
 	mov	r10, r6
 	mov	r15, r7
 	mov	#SYS_futex, r3
@@ -192,7 +208,7 @@ __lll_mutex_timedlock_wait:
 	.align	5
 	cfi_startproc
 __lll_mutex_unlock_wake:
-	mov	#FUTEX_WAKE, r5
+	LOAD_FUTEX_WAKE (r5, r0)
 	mov	#1, r6		/* Wake one thread.  */
 	mov	#0, r7
 	mov.l	r7, @r4		/* Stores 0.  */
@@ -261,7 +277,10 @@ __lll_timedwait_tid:
 	bt	4f
 
 	mov	r8, r4
-	mov	#FUTEX_WAIT, r5
+	/* XXX The kernel so far uses global futex for the wakeup at
+	   all times.  */
+	mov	#0, r5
+	extu.b	r5, r5
 	mov	r2, r6
 	mov	r15, r7
 	mov	#SYS_futex, r3
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
index be8d403e26..3092b27642 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
@@ -22,6 +22,7 @@
 #include <time.h>
 #include <sys/param.h>
 #include <bits/pthreadtypes.h>
+#include <kernel-features.h>
 
 #define SYS_futex		240
 #define FUTEX_WAIT		0
@@ -29,6 +30,14 @@
 #define FUTEX_LOCK_PI		6
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
+#define FUTEX_PRIVATE_FLAG	128
+
+
+/* Values for 'private' parameter of locking macros.  Yes, the
+   definition seems to be backwards.  But it is not.  The bit will be
+   reversed before passing to the system call.  */
+#define LLL_PRIVATE    0
+#define LLL_SHARED     FUTEX_PRIVATE_FLAG
 
 
 /* Initializer for compatibility lock.  */
@@ -251,7 +260,7 @@ extern int __lll_mutex_unlock_wake (int *__futex) attribute_hidden;
 		and %2,%0\n\
 		mov.l %0,@%1\n\
 	     1: mov r1,r15"\
-		: "=&r" (__result) : "r" (__futex), "r" (FUTEX_TID_MASK) \
+		: "=&r" (__result) : "r" (__futex), "r" (FUTEX_WAITERS) \
 		: "r0", "r1", "memory");	\
 	    if (__result) \
 	      __lll_mutex_unlock_wake (__futex); })
@@ -269,7 +278,7 @@ extern int __lll_mutex_unlock_wake (int *__futex) attribute_hidden;
 	     1: mov r1,r15"\
 		: "=&r" (__ignore) : "r" (__futex), "r" (FUTEX_OWNER_DIED) \
 		: "r0", "r1", "memory");	\
-	    lll_futex_wake (__futex, 1); })
+	    lll_futex_wake (__futex, 1, 0); })
 
 #define lll_mutex_islocked(futex) \
   (futex != 0)
@@ -294,15 +303,57 @@ typedef int lll_lock_t;
 	trapa #0x14"
 # endif
 
-#define lll_futex_wait(futex, val) \
+#define lll_futex_wait(futex, val, private) \
+  lll_futex_timed_wait (futex, val, NULL, private)
+
+
+#define lll_futex_timed_wait(futex, val, timeout, private) \
   ({									      \
     int __status;							      \
     register unsigned long __r3 asm ("r3") = SYS_futex;			      \
     register unsigned long __r4 asm ("r4") = (unsigned long) (futex);	      \
     register unsigned long __r5 asm ("r5") = FUTEX_WAIT;		      \
     register unsigned long __r6 asm ("r6") = (unsigned long) (val);	      \
+    register unsigned long __r7 asm ("r7") = (timeout);			      \
+    __asm __volatile (SYSCALL_WITH_INST_PAD				      \
+		      : "=z" (__status)					      \
+		      : "r" (__r3), "r" (__r4), "r" (__r5),		      \
+			"r" (__r6), "r" (__r7)				      \
+		      : "memory", "t");					      \
+    __status;								      \
+  })
+
+
+#define lll_futex_wake(futex, nr, private) \
+  do {									      \
+    int __ignore;							      \
+    register unsigned long __r3 asm ("r3") = SYS_futex;			      \
+    register unsigned long __r4 asm ("r4") = (unsigned long) (futex);	      \
+    register unsigned long __r5 asm ("r5") = FUTEX_WAKE;		      \
+    register unsigned long __r6 asm ("r6") = (unsigned long) (nr);	      \
     register unsigned long __r7 asm ("r7") = 0;				      \
     __asm __volatile (SYSCALL_WITH_INST_PAD				      \
+		      : "=z" (__ignore)					      \
+		      : "r" (__r3), "r" (__r4), "r" (__r5),		      \
+			"r" (__r6), "r" (__r7)				      \
+		      : "memory", "t");					      \
+  } while (0)
+
+
+#define lll_private_futex_wait(futex, val) \
+  lll_private_futex_timed_wait (futex, val, NULL)
+
+
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define lll_private_futex_timed_wait(futex, val, timeout) \
+  ({									      \
+    int __status;							      \
+    register unsigned long __r3 asm ("r3") = SYS_futex;			      \
+    register unsigned long __r4 asm ("r4") = (unsigned long) (futex);	      \
+    register unsigned long __r5 asm ("r5") = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; \
+    register unsigned long __r6 asm ("r6") = (unsigned long) (val);	      \
+    register unsigned long __r7 asm ("r7") = (timeout);			      \
+    __asm __volatile (SYSCALL_WITH_INST_PAD				      \
 		      : "=z" (__status)					      \
 		      : "r" (__r3), "r" (__r4), "r" (__r5),		      \
 			"r" (__r6), "r" (__r7)				      \
@@ -311,14 +362,32 @@ typedef int lll_lock_t;
   })
 
 
-#define lll_futex_timed_wait(futex, val, timeout) \
+# define lll_private_futex_wake(futex, nr) \
+  do {									      \
+    int __ignore;							      \
+    register unsigned long __r3 asm ("r3") = SYS_futex;			      \
+    register unsigned long __r4 asm ("r4") = (unsigned long) (futex);	      \
+    register unsigned long __r5 asm ("r5") = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; \
+    register unsigned long __r6 asm ("r6") = (unsigned long) (nr);	      \
+    register unsigned long __r7 asm ("r7") = 0;				      \
+    __asm __volatile (SYSCALL_WITH_INST_PAD				      \
+		      : "=z" (__ignore)					      \
+		      : "r" (__r3), "r" (__r4), "r" (__r5),		      \
+			"r" (__r6), "r" (__r7)				      \
+		      : "memory", "t");					      \
+  } while (0)
+
+
+#else
+# define lll_private_futex_timed_wait(futex, val, timeout) \
   ({									      \
     int __status;							      \
     register unsigned long __r3 asm ("r3") = SYS_futex;			      \
     register unsigned long __r4 asm ("r4") = (unsigned long) (futex);	      \
-    register unsigned long __r5 asm ("r5") = FUTEX_WAIT;		      \
+    register unsigned long __r5 asm ("r5");				      \
     register unsigned long __r6 asm ("r6") = (unsigned long) (val);	      \
     register unsigned long __r7 asm ("r7") = (timeout);			      \
+    __r5 = THREAD_GETMEM (THREAD_SELF, header.private_futex);		      \
     __asm __volatile (SYSCALL_WITH_INST_PAD				      \
 		      : "=z" (__status)					      \
 		      : "r" (__r3), "r" (__r4), "r" (__r5),		      \
@@ -328,7 +397,7 @@ typedef int lll_lock_t;
   })
 
 
-#define lll_futex_wake(futex, nr) \
+# define lll_private_futex_wake(futex, nr) \
   do {									      \
     int __ignore;							      \
     register unsigned long __r3 asm ("r3") = SYS_futex;			      \
@@ -336,12 +405,14 @@ typedef int lll_lock_t;
     register unsigned long __r5 asm ("r5") = FUTEX_WAKE;		      \
     register unsigned long __r6 asm ("r6") = (unsigned long) (nr);	      \
     register unsigned long __r7 asm ("r7") = 0;				      \
+    __r5 |= THREAD_GETMEM (THREAD_SELF,	header.private_futex);		      \
     __asm __volatile (SYSCALL_WITH_INST_PAD				      \
 		      : "=z" (__ignore)					      \
 		      : "r" (__r3), "r" (__r4), "r" (__r5),		      \
 			"r" (__r6), "r" (__r7)				      \
 		      : "memory", "t");					      \
   } while (0)
+#endif
 
 
 /* The states of a lock are:
@@ -367,7 +438,7 @@ extern int __lll_wait_tid (int *tid) attribute_hidden;
   do {									      \
     __typeof (tid) *__tid = &(tid);					      \
     while (*__tid != 0)							      \
-      lll_futex_wait (__tid, *__tid);					      \
+      lll_futex_wait (__tid, *__tid, 0);				      \
   } while (0)
 
 extern int __lll_timedwait_tid (int *tid, const struct timespec *abstime)
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_barrier_wait.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_barrier_wait.S
index 1fbb23a5a6..35dd607fc1 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_barrier_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_barrier_wait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -65,7 +65,13 @@ pthread_barrier_wait:
 #if CURR_EVENT != 0
 	add	#CURR_EVENT, r4
 #endif
+#if FUTEX_WAIT == 0
+	mov.l	@(PRIVATE,r8), r5
+#else
 	mov	#FUTEX_WAIT, r5
+	mov.l	@(PRIVATE,r8), r0
+	or	r0, r5
+#endif
 	mov	#0, r7
 8:
 	mov	#SYS_futex, r3
@@ -118,6 +124,8 @@ pthread_barrier_wait:
 #endif
 	mov	#0, r7
 	mov	#FUTEX_WAKE, r5
+	mov.l	@(PRIVATE,r8), r0
+	or	r0, r5
 	mov	#SYS_futex, r3
 	extu.b	r3, r3
 	trapa	#0x14
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S
index 6c782c8a76..0173cfb7d8 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -120,7 +120,7 @@ __pthread_cond_timedwait:
 	mov.l	@(cond_futex,r8), r0
 	add	r2, r0
 	mov.l	r0, @(cond_futex,r8)
-	mov	#(1 << clock_bits), r2
+	mov	#(1 << nwaiters_shift), r2
 	mov.l	@(cond_nwaiters,r8), r0
 	add	r2, r0
 	mov.l	r0, @(cond_nwaiters,r8)
@@ -136,7 +136,7 @@ __pthread_cond_timedwait:
 #ifdef __NR_clock_gettime
 	/* Get the clock number.	 */
 	mov.l	@(cond_nwaiters,r8), r4
-	mov	#((1 << clock_bits) - 1), r0
+	mov	#((1 << nwaiters_shift) - 1), r0
 	and	r0, r4
 	/* Only clocks 0 and 1 are allowed.  Both are handled in the
 	   kernel.  */
@@ -323,7 +323,7 @@ __pthread_cond_timedwait:
 	mov.l	r1,@(woken_seq+4,r8)
 
 24:
-	mov	#(1 << clock_bits), r2
+	mov	#(1 << nwaiters_shift), r2
 	mov.l	@(cond_nwaiters,r8),r0
 	sub	r2, r0
 	mov.l	r0,@(cond_nwaiters,r8)
@@ -335,7 +335,7 @@ __pthread_cond_timedwait:
 	not	r0, r0
 	cmp/eq	#0, r0
 	bf/s	25f
-	 mov	#((1 << clock_bits) - 1), r1
+	 mov	#((1 << nwaiters_shift) - 1), r1
 	not	r1, r1
 	mov.l	@(cond_nwaiters,r8),r0
 	tst	r1, r0
@@ -557,7 +557,7 @@ __condvar_tw_cleanup:
 	mov.l	r1,@(woken_seq+4,r8)
 
 3:
-	mov	#(1 << clock_bits), r2
+	mov	#(1 << nwaiters_shift), r2
 	mov.l	@(cond_nwaiters,r8),r0
 	sub	r2, r0
 	mov.l	r0,@(cond_nwaiters,r8)
@@ -570,7 +570,7 @@ __condvar_tw_cleanup:
 	not	r0, r0
 	cmp/eq	#0, r0
 	bf/s	4f
-	 mov	#((1 << clock_bits) - 1), r1
+	 mov	#((1 << nwaiters_shift) - 1), r1
 	not	r1, r1
 	mov.l	@(cond_nwaiters,r8),r0
 	tst	r1, r0
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S
index 6c59f3e6c0..5eb332e484 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -107,7 +107,7 @@ __pthread_cond_wait:
 	mov.l	@(cond_futex,r8),r0
 	add	r2, r0
 	mov.l	r0,@(cond_futex,r8)
-	mov	#(1 << clock_bits), r2
+	mov	#(1 << nwaiters_shift), r2
 	mov.l	@(cond_nwaiters,r8), r0
 	add	r2, r0
 	mov.l	r0, @(cond_nwaiters,r8)
@@ -197,7 +197,7 @@ __pthread_cond_wait:
 	mov.l	r1,@(woken_seq+4,r8)
 
 16:
-	mov	#(1 << clock_bits), r2
+	mov	#(1 << nwaiters_shift), r2
 	mov.l	@(cond_nwaiters,r8),r0
 	sub	r2, r0
 	mov.l	r0,@(cond_nwaiters,r8)
@@ -209,7 +209,7 @@ __pthread_cond_wait:
 	not	r0, r0
 	cmp/eq	#0, r0
 	bf/s	17f
-	 mov	#((1 << clock_bits) - 1), r1
+	 mov	#((1 << nwaiters_shift) - 1), r1
 	not	r1, r1
 	mov.l	@(cond_nwaiters,r8),r0
 	tst	r1, r0
@@ -421,7 +421,7 @@ __condvar_w_cleanup:
 	mov.l	r1,@(woken_seq+4,r8)
 
 3:
-	mov	#(1 << clock_bits), r2
+	mov	#(1 << nwaiters_shift), r2
 	mov.l	@(cond_nwaiters,r8),r0
 	sub	r2, r0
 	mov.l	r0,@(cond_nwaiters,r8)
@@ -434,7 +434,7 @@ __condvar_w_cleanup:
 	not	r0, r0
 	cmp/eq	#0, r0
 	bf/s	4f
-	 mov	#((1 << clock_bits) - 1), r1
+	 mov	#((1 << nwaiters_shift) - 1), r1
 	not	r1, r1
 	mov.l	@(cond_nwaiters,r8),r0
 	tst	r1, r0
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_once.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_once.S
index 02af56b4c7..3d694d8376 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_once.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_once.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,11 +18,13 @@
 
 #include <unwindbuf.h>
 #include <sysdep.h>
+#include <kernel-features.h>
 #include "lowlevel-atomic.h"
 
-#define SYS_futex	240
-#define FUTEX_WAIT	0
-#define FUTEX_WAKE	1
+#define SYS_futex		240
+#define FUTEX_WAIT		0
+#define FUTEX_WAKE		1
+#define FUTEX_PRIVATE_FLAG	128
 
 	.comm	__fork_generation, 4, 4
 
@@ -95,7 +97,19 @@ __pthread_once:
 	bf	3f	/* Different for generation -> run initializer.  */
 
 	/* Somebody else got here first.  Wait.  */
-	mov	#FUTEX_WAIT, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAIT), r5
+	extu.b	r5, r5
+#else
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r5
+# if FUTEX_WAIT != 0
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+# endif
+#endif
 	mov	r3, r6
 	mov	#0, r7
 	mov	#SYS_futex, r3
@@ -157,7 +171,17 @@ __pthread_once:
 	INC (@r9, r2)
 	/* Wake up all other threads.  */
 	mov	r9, r4
-	mov	#FUTEX_WAKE, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAKE), r5
+	extu.b	r5, r5
+#else
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r5
+	mov	#FUTEX_WAKE, r0
+	or	r0, r5
+#endif
 	mov	#-1, r6
 	shlr	r6		/* r6 = 0x7fffffff */
 	mov	#0, r7
@@ -192,7 +216,17 @@ __pthread_once:
 	mov	#0, r7
 	mov.l	r7, @r9
 	mov	r9, r4
-	mov	#FUTEX_WAKE, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAKE), r5
+#else
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r5
+	mov	#FUTEX_WAKE, r0
+	or	r0, r5
+#endif
+	extu.b	r5, r5
 	mov	#-1, r6
 	shlr	r6		/* r6 = 0x7fffffff */
 	mov	#SYS_futex, r3
@@ -208,6 +242,10 @@ __pthread_once:
 	sleep
 	cfi_endproc
 
+#if !__ASSUME_PRIVATE_FUTEX
+.Lpfoff:
+	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE
+#endif
 	.align	2
 .Lsigsetjmp:
 	.long	__sigsetjmp@PLT-(.Lsigsetjmp0-.)
@@ -224,23 +262,3 @@ __pthread_once_internal = __pthread_once
 
 	.globl	pthread_once
 pthread_once = __pthread_once
-
-
-	.type	clear_once_control,@function
-	.align	5
-clear_once_control:
-	mov	#0, r0
-	mov.l	r0, @r4
-
-	mov	#FUTEX_WAKE, r5
-	mov	#-1, r6
-	shlr	r6		/* r6 = 0x7fffffff */
-	mov	#0, r7
-	mov	#SYS_futex, r3
-	extu.b	r3, r3
-	trapa	#0x14
-	SYSCALL_INST_PAD
-
-	rts
-	 nop
-	.size	clear_once_control,.-clear_once_control
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S
index f64c7217c9..f1795131f8 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_rdlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,11 +20,13 @@
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <tcb-offsets.h>
+#include <kernel-features.h>
 #include "lowlevel-atomic.h"
 
 #define SYS_futex		240
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_PRIVATE_FLAG	128
 
 
 	.text
@@ -55,7 +57,8 @@ __pthread_rwlock_rdlock:
 	mov.l	@(WRITERS_QUEUED,r8), r0
 	tst	r0, r0
 	bt	5f
-	mov.l	@(FLAGS,r8), r0
+	mov	#FLAGS, r0
+	mov.b	@(r0,r8), r0
 	tst	r0, r0
 	bt	5f
 3:
@@ -75,9 +78,28 @@ __pthread_rwlock_rdlock:
 	tst	r2, r2
 	bf	10f
 11:
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAIT), r0
+	xor	r0, r5
+	extu.b	r5, r5
+#else
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	extu.b	r5, r5
+# if FUTEX_WAIT != 0
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+# endif
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r0
+	xor	r0, r5
+#endif
 	mov	r8, r4
 	add	#READERS_WAKEUP, r4
-	mov	#FUTEX_WAIT, r5
 	mov	r9, r6
 	mov	#0, r7
 	mov	#SYS_futex, r3
@@ -124,6 +146,11 @@ __pthread_rwlock_rdlock:
 	rts
 	 mov	r3, r0
 
+#if !__ASSUME_PRIVATE_FUTEX
+.Lpfoff:
+	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE
+#endif
+
 1:
 	mov	r8, r5
 #if MUTEX != 0
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S
index 633a14b1aa..e87326e9bd 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedrdlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,12 +20,14 @@
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <tcb-offsets.h>
+#include <kernel-features.h>
 #include "lowlevel-atomic.h"
 
 #define SYS_gettimeofday	__NR_gettimeofday
 #define SYS_futex		240
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_PRIVATE_FLAG	128
 
 
 	.text
@@ -59,7 +61,8 @@ pthread_rwlock_timedrdlock:
 	mov.l	@(WRITERS_QUEUED,r8), r0
 	tst	r0, r0
 	bt	5f
-	mov.l	@(FLAGS,r8), r0
+	mov	#FLAGS, r0
+	mov.b	@(r0,r8), r0
 	tst	r0, r0
 	bt	5f
 3:
@@ -117,7 +120,26 @@ pthread_rwlock_timedrdlock:
 
 	/* Futex call.  */
 	mov	r15, r7
-	mov	#FUTEX_WAIT, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAIT), r0
+	xor	r0, r5
+	extu.b	r5, r5
+#else
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	extu.b	r5, r5
+# if FUTEX_WAIT != 0
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+# endif
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r0
+	xor	r0, r5
+#endif
 	mov	r10, r6
 	mov	r8, r4
 	add	#READERS_WAKEUP, r4
@@ -176,6 +198,10 @@ pthread_rwlock_timedrdlock:
 	rts
 	 mov	r3, r0
 
+#if !__ASSUME_PRIVATE_FUTEX
+.Lpfoff:
+	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE
+#endif
 	.align	2
 .L1k0:
 	.long	1000
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S
index 29e29b6f65..18641fe9df 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_timedwrlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,12 +20,14 @@
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <tcb-offsets.h>
+#include <kernel-features.h>
 #include "lowlevel-atomic.h"
 
 #define SYS_gettimeofday	__NR_gettimeofday
 #define SYS_futex		240
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_PRIVATE_FLAG    128
 
 
 	.text
@@ -114,7 +116,26 @@ pthread_rwlock_timedwrlock:
 
 	/* Futex call.  */
 	mov	r15, r7
-	mov	#FUTEX_WAIT, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAIT), r0
+	xor	r0, r5
+	extu.b	r5, r5
+#else
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	extu.b	r5, r5
+# if FUTEX_WAIT != 0
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+# endif
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r0
+	xor	r0, r5
+#endif
 	mov	r10, r6
 	mov	r8, r4
 	add	#WRITERS_WAKEUP, r4
@@ -175,6 +196,10 @@ pthread_rwlock_timedwrlock:
 	rts
 	 mov	r3, r0
 
+#if !__ASSUME_PRIVATE_FUTEX
+.Lpfoff:
+	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE
+#endif
 .L1k1:
 	.word	1000
 	.align	2
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S
index 172689bec3..df4df60d7f 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_unlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,11 +18,13 @@
 
 #include <sysdep.h>
 #include <lowlevelrwlock.h>
+#include <kernel-features.h>
 #include "lowlevel-atomic.h"
 
 #define SYS_futex		240
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_PRIVATE_FLAG	128
 
 
 	.text
@@ -87,7 +89,24 @@ __pthread_rwlock_unlock:
 	bf	7f
 
 8:
-	mov	#FUTEX_WAKE, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAKE), r0
+	xor	r0, r5
+	extu.b	r5, r5
+#else
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	extu.b	r5, r5
+	mov	#FUTEX_WAKE, r0
+	or	r0, r5
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r0
+	xor	r0, r5
+#endif
 	mov	#SYS_futex, r3
 	mov	#0, r7
 	extu.b	r3, r3
@@ -154,6 +173,10 @@ __pthread_rwlock_unlock:
 	bra	8b
 	 mov.l	@r15+, r4
 
+#if !__ASSUME_PRIVATE_FUTEX
+.Lpfoff:
+	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE
+#endif
 	.align	2
 .Lwait8:	
 	.long	__lll_mutex_lock_wait-.Lwait8b
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S
index 995d823e80..13a2fda9fd 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/pthread_rwlock_wrlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,11 +20,13 @@
 #include <lowlevelrwlock.h>
 #include <pthread-errnos.h>
 #include <tcb-offsets.h>
+#include <kernel-features.h>
 #include "lowlevel-atomic.h"
 
 #define SYS_futex		240
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_PRIVATE_FLAG	128
 
 
 	.text
@@ -74,7 +76,26 @@ __pthread_rwlock_wrlock:
 11:
 	mov	r8, r4
 	add	#WRITERS_WAKEUP, r4
-	mov	#FUTEX_WAIT, r5
+#if __ASSUME_PRIVATE_FUTEX
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	mov	#(FUTEX_PRIVATE_FLAG|FUTEX_WAIT), r0
+	xor	r0, r5
+	extu.b	r5, r5
+#else
+	mov	#PSHARED, r0
+	mov.b	@(r0,r8), r5
+	extu.b	r5, r5
+# if FUTEX_WAIT != 0
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+# endif
+	stc	gbr, r1
+	mov.w	.Lpfoff, r2
+	add	r2, r1
+	mov.l	@r1, r0
+	xor	r0, r5
+#endif
 	mov	r9, r6
 	mov	#0, r7
 	mov	#SYS_futex, r3
@@ -152,6 +173,10 @@ __pthread_rwlock_wrlock:
 	bra	7b
 	 mov	#0, r3
 
+#if !__ASSUME_PRIVATE_FUTEX
+.Lpfoff:
+	.word	PRIVATE_FUTEX - TLS_PRE_TCB_SIZE
+#endif
 .Ltidoff:
 	.word	TID - TLS_PRE_TCB_SIZE
 
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/sem_post.S b/nptl/sysdeps/unix/sysv/linux/sh/sem_post.S
index 9bc12da7e0..a63233feab 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/sem_post.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/sem_post.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,6 +19,7 @@
 #include <sysdep.h>
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
+#include <structsem.h>
 #include "lowlevel-atomic.h"
 
 
@@ -34,11 +35,14 @@
 	.align	5
 __new_sem_post:
 	mov	#1, r3
-	XADD (r3, @r4, r2)
-
+	XADD (r3, @(VALUE,r4), r2)
+	mov.l	@(NWAITERS,r4), r2
+	tst	r2, r2
+	bt	2f
 	mov	#FUTEX_WAKE, r5
-	mov	r2, r6
-	add	#1, r6
+	mov.l	@(PRIVATE,r4), r1
+	or	r1, r5
+	mov	#1, r6
 	mov	#0, r7
 	mov	#SYS_futex, r3
 	extu.b	r3, r3
@@ -47,6 +51,7 @@ __new_sem_post:
 
 	cmp/pz	r0
 	bf	1f
+2:
 	rts
 	 mov	#0, r0
 
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/sh/sem_timedwait.S
index acb7d0f78b..e5e064b3a5 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/sem_timedwait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,32 +20,25 @@
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <tcb-offsets.h>
+#include <structsem.h>
 #include "lowlevel-atomic.h"
 
 
 #define SYS_gettimeofday	__NR_gettimeofday
 #define SYS_futex		240
 #define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
+
+#if VALUE != 0
+# error "code needs to be rewritten for VALUE != 0"
+#endif
 
 	.text
 
 	.globl	sem_timedwait
 	.type	sem_timedwait,@function
 	.align	5
-	cfi_startproc
 sem_timedwait:
-	/* First check for cancellation.  */
-	stc	gbr, r0
-	mov.w	.Lchand, r1
-	mov.l	@(r0,r1), r0
-	mov	#0xf9, r1
-	and	r1, r0
-	cmp/eq	#8, r0
-	bf	0f
-	bra	10f
-	 stc	gbr, r0
-0:
+.LSTARTCODE:
 	mov.l	@r4, r0
 2:
 	tst	r0, r0
@@ -62,22 +55,17 @@ sem_timedwait:
 1:
 	/* Check whether the timeout value is valid.  */
 	mov.l	r8, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r8, 0)
+.Lpush_r8:
 	mov.l	r9, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r9, 0)
+.Lpush_r9:
 	mov.l	r10, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r10, 0)
+.Lpush_r10:
 	mov.l	r12, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r12, 0)
+.Lpush_r12:
 	sts.l	pr, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (pr, 0)
+.Lpush_pr:
 	add	#-8, r15
-	cfi_adjust_cfa_offset(8)
+.Lalloc:
 	mov	r4, r8
 	mov	r5, r9
 
@@ -87,13 +75,9 @@ sem_timedwait:
 	cmp/hs	r1, r0
 	bt/s	6f
 	 mov	#EINVAL, r0
-7:
-	mov.l	.Lenable0, r1
-	bsrf	r1
-	 nop
-.Lenable0b:
-	mov	r0, r10
+	INC (@(NWAITERS,r8),r2)
 
+7:
 	/* Compute relative timeout.  */
 	mov	r15, r4
 	mov	#0, r5
@@ -124,9 +108,21 @@ sem_timedwait:
 	mov.l	r2, @r15
 	mov.l	r3, @(4,r15)
 
-	/* Futex call.  */
+.LcleanupSTART:
+	mov.l	.Lenable0, r1
+	bsrf	r1
+	 nop
+.Lenable0b:
+	mov	r0, r10
+
 	mov	r8, r4
-	mov	#FUTEX_WAIT, r5
+#if FUTEX_WAIT == 0
+	mov.l	@(PRIVATE,r8), r5
+#else
+	mov.l	@(PRIVATE,r8), r5
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+#endif
 	mov	#0, r6
 	mov	r15, r7
 	mov	#SYS_futex, r3
@@ -140,6 +136,7 @@ sem_timedwait:
 	 mov	r0, r10
 .Ldisable0b:	
 	mov	r10, r0
+.LcleanupEND:
 
 	tst	r0, r0
 	bt	9f
@@ -158,6 +155,10 @@ sem_timedwait:
 	bf/s	8b
 	 mov	r2, r0
 
+	DEC (@(NWAITERS,r8), r2)
+	mov	#0, r0
+
+10:
 	add	#8, r15
 	lds.l	@r15+, pr
 	mov.l	@r15+, r12
@@ -165,12 +166,12 @@ sem_timedwait:
 	mov.l	@r15+, r9
 	mov.l	@r15+, r8
 	rts
-	 mov	#0, r0
+	 nop
 
 3:
 	neg	r0, r0
 6:
-	mov	r0, r8
+	mov	r0, r10
 	mova	.Lgot2, r0
 	mov.l	.Lgot2, r12
 	add	r0, r12
@@ -180,46 +181,20 @@ sem_timedwait:
 	stc	gbr, r1
 	mov.l	@(r0, r12), r0
 	add	r1, r0
-	mov.l	r8, @r0
+	mov.l	r10, @r0
 #else
 	mov.l	.Lerrloc2, r1
 	bsrf	r1
 	 nop
 .Lerrloc2b:
-	mov.l	r8, @r0
+	mov.l	r10, @r0
 #endif
-	add	#8, r15
-	lds.l	@r15+, pr
-	mov.l	@r15+, r12
-	mov.l	@r15+, r10
-	mov.l	@r15+, r9
-	mov.l	@r15+, r8
-	rts
+	DEC (@(NWAITERS,r8), r2)
+	bra	10b
 	 mov	#-1, r0
 
-10:
-	/* Canceled.  */
-	mov.w	.Lresult, r1
-	mov	#-1, r2
-	mov.l	r2, @(r0,r1)
-	mov.w	.Lchand, r0
-	or.b	#0x10, @(r0,gbr)
-	stc	gbr, r0
-	mov.w	.Lclbuf, r1
-	mov.l	.Lunwind, r2
-	braf	r2
-	 mov.l	@(r0,r1), r4
-.Lunwindb:
-	cfi_endproc
-
 .L1k:
 	.word	1000
-.Lchand:
-	.word	CANCELHANDLING - TLS_PRE_TCB_SIZE
-.Lresult:
-	.word	RESULT - TLS_PRE_TCB_SIZE
-.Lclbuf:
-	.word	CLEANUP_JMP_BUF - TLS_PRE_TCB_SIZE
 	.align	2
 .L1g:
 	.long	1000000000
@@ -236,6 +211,151 @@ sem_timedwait:
 	.long	__pthread_enable_asynccancel-.Lenable0b
 .Ldisable0:
 	.long	__pthread_disable_asynccancel-.Ldisable0b
-.Lunwind:
-	.long	HIDDEN_JUMPTARGET (__pthread_unwind)-.Lunwindb
 	.size	sem_timedwait,.-sem_timedwait
+
+	.type	sem_wait_cleanup,@function
+sem_wait_cleanup:
+ 	DEC (@(NWAITERS,r8), r2)
+.LcallUR:
+	mov.l	.Lresume, r1
+#ifdef PIC
+	add	r12, r1
+#endif
+	jsr	@r1
+	 nop
+	sleep
+
+	.align	2
+.Lresume:
+#ifdef PIC
+	.long	_Unwind_Resume@GOTOFF
+#else
+	.long	_Unwind_Resume
+#endif
+.LENDCODE:
+	.size	sem_wait_cleanup,.-sem_wait_cleanup
+
+
+	.section .gcc_except_table,"a",@progbits
+.LexceptSTART:
+	.byte	0xff				! @LPStart format (omit)
+	.byte	0xff				! @TType format (omit)
+	.byte	0x01				! call-site format
+						! DW_EH_PE_uleb128
+	.uleb128 .Lcstend-.Lcstbegin
+.Lcstbegin:
+	.uleb128 .LcleanupSTART-.LSTARTCODE
+	.uleb128 .LcleanupEND-.LcleanupSTART
+	.uleb128 sem_wait_cleanup-.LSTARTCODE
+	.uleb128  0
+	.uleb128 .LcallUR-.LSTARTCODE
+	.uleb128 .LENDCODE-.LcallUR
+	.uleb128 0
+	.uleb128  0
+.Lcstend:
+
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+	.ualong	.LENDCIE-.LSTARTCIE		! Length of the CIE.
+.LSTARTCIE:
+	.ualong	0				! CIE ID.
+	.byte	1				! Version number.
+#ifdef SHARED
+	.string	"zPLR"				! NUL-terminated augmentation
+						! string.
+#else
+	.string	"zPL"				! NUL-terminated augmentation
+						! string.
+#endif
+	.uleb128 1				! Code alignment factor.
+	.sleb128 -4				! Data alignment factor.
+	.byte	0x11				! Return address register
+						! column.
+#ifdef SHARED
+	.uleb128 7				! Augmentation value length.
+	.byte	0x9b				! Personality: DW_EH_PE_pcrel
+						! + DW_EH_PE_sdata4
+						! + DW_EH_PE_indirect
+	.ualong	DW.ref.__gcc_personality_v0-.
+	.byte	0x1b				! LSDA Encoding: DW_EH_PE_pcrel
+						! + DW_EH_PE_sdata4.
+	.byte	0x1b				! FDE Encoding: DW_EH_PE_pcrel
+						! + DW_EH_PE_sdata4.
+#else
+	.uleb128 6				! Augmentation value length.
+	.byte	0x0				! Personality: absolute
+	.ualong	__gcc_personality_v0
+	.byte	0x0				! LSDA Encoding: absolute
+#endif
+	.byte 0x0c				! DW_CFA_def_cfa
+	.uleb128 0xf
+	.uleb128 0
+	.align 4
+.LENDCIE:
+
+	.ualong	.LENDFDE-.LSTARTFDE		! Length of the FDE.
+.LSTARTFDE:
+	.ualong	.LSTARTFDE-.LSTARTFRAME		! CIE pointer.
+#ifdef SHARED
+	.ualong	.LSTARTCODE-.			! PC-relative start address
+						! of the code.
+#else
+	.ualong	.LSTARTCODE			! Start address of the code.
+#endif
+	.ualong	.LENDCODE-.LSTARTCODE		! Length of the code.
+	.uleb128 4				! Augmentation size
+#ifdef SHARED
+	.ualong	.LexceptSTART-.
+#else
+	.ualong	.LexceptSTART
+#endif
+
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r8-.LSTARTCODE
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 4
+	.byte   0x88				! DW_CFA_offset r8
+        .uleb128 1
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r9-.Lpush_r8
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 8
+	.byte   0x89				! DW_CFA_offset r9
+        .uleb128 2
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r10-.Lpush_r9
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 12
+	.byte   0x8a				! DW_CFA_offset r10
+        .uleb128 3
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r12-.Lpush_r10
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 16
+	.byte   0x8c				! DW_CFA_offset r12
+        .uleb128 4
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_pr-.Lpush_r12
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 20
+	.byte	0x91				! DW_CFA_offset pr
+	.uleb128 5
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lalloc-.Lpush_pr
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 28
+	.align	4
+.LENDFDE:
+
+
+#ifdef SHARED
+	.hidden	DW.ref.__gcc_personality_v0
+	.weak	DW.ref.__gcc_personality_v0
+	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
+	.align	4
+	.type	DW.ref.__gcc_personality_v0, @object
+	.size	DW.ref.__gcc_personality_v0, 4
+DW.ref.__gcc_personality_v0:
+	.long	__gcc_personality_v0
+#endif
diff --git a/nptl/sysdeps/unix/sysv/linux/sh/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/sh/sem_wait.S
index 9ceb8f1c24..85ce909259 100644
--- a/nptl/sysdeps/unix/sysv/linux/sh/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/sh/sem_wait.S
@@ -20,44 +20,35 @@
 #include <shlib-compat.h>
 #include <pthread-errnos.h>
 #include <tcb-offsets.h>
+#include <structsem.h>
 #include "lowlevel-atomic.h"
 
 
 #define SYS_gettimeofday	__NR_gettimeofday
 #define SYS_futex		240
 #define FUTEX_WAIT		0
-#define FUTEX_WAKE		1
+
+#if VALUE != 0
+# error "code needs to be rewritten for VALUE != 0"
+#endif
 
 	.text
 
 	.globl	__new_sem_wait
 	.type	__new_sem_wait,@function
 	.align	5
-	cfi_startproc
 __new_sem_wait:
-	/* First check for cancellation.  */
-	stc	gbr, r0
-	mov.w	.Lchand, r1
-	mov.l	@(r0,r1), r0
-	mov	#0xf9, r1
-	and	r1, r0
-	cmp/eq	#8, r0
-	bt	5f
-
+.LSTARTCODE:
 	mov.l	r8, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r8, 0)
+.Lpush_r8:
 	mov.l	r10, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r10, 0)
+.Lpush_r10:
 	mov.l	r12, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (r12, 0)
+.Lpush_r12:
 	sts.l	pr, @-r15
-	cfi_adjust_cfa_offset(4)
-	cfi_rel_offset (pr, 0)
+.Lpush_pr:
 	mov	r4, r8
-3:
+
 	mov.l	@r8, r0
 2:
 	tst	r0, r0
@@ -68,10 +59,21 @@ __new_sem_wait:
 	CMPXCHG (r4, @r8, r3, r2)
 	bf/s	2b
 	 mov	r2, r0
-	bra	9f
-	 mov	#0, r0
+7:
+	mov	#0, r0
+9:
+	lds.l	@r15+, pr
+	mov.l	@r15+, r12
+	mov.l	@r15+, r10
+	rts
+	 mov.l	@r15+, r8
 
+.Lafter_ret:
 1:
+	INC (@(NWAITERS,r8),r2)
+	
+.LcleanupSTART:
+6:
 	mov.l	.Lenable0, r1
 	bsrf	r1
 	 nop
@@ -79,7 +81,13 @@ __new_sem_wait:
 	mov	r0, r10
 
 	mov	r8, r4
-	mov	#FUTEX_WAIT, r5
+#if FUTEX_WAIT == 0
+	mov.l	@(PRIVATE,r8), r5
+#else
+	mov.l	@(PRIVATE,r8), r5
+	mov	#FUTEX_WAIT, r0
+	or	r0, r5
+#endif
 	mov	#0, r6
 	mov	#0, r7
 	mov	#SYS_futex, r3
@@ -93,14 +101,35 @@ __new_sem_wait:
 	 mov	r0, r10
 .Ldisable0b:	
 	mov	r10, r0
+.LcleanupEND:
 
 	tst	r0, r0
-	bt	3b
+	bt	3f
 	cmp/eq	#-EWOULDBLOCK, r0
-	bt	3b
-	neg	r0, r0
+	bf	4f
+
+3:
+	mov.l	@r8, r0
+5:
+	tst	r0, r0
+	bt	6b
+
+	mov	r0, r3
+	mov	r0, r4
+	add	#-1, r3
+	CMPXCHG (r4, @r8, r3, r2)
+	bf/s	5b
+	 mov	r2, r0
+
+	DEC (@(NWAITERS,r8), r2)
+	bra	7b
+	 nop
 
-	mov	r0, r8
+4:
+	neg	r0, r0
+	mov	r0, r4
+	DEC (@(NWAITERS,r8), r2)
+	mov	r4, r8
 	mova	.Lgot0, r0
 	mov.l	.Lgot0, r12
 	add	r0, r12
@@ -118,36 +147,9 @@ __new_sem_wait:
 .Lerrloc0b:
 	mov.l	r8, @r0
 #endif
-	mov	#-1, r0
-9:
-	lds.l	@r15+, pr
-	mov.l	@r15+, r12
-	mov.l	@r15+, r10
-	rts
-	 mov.l	@r15+, r8
-5:
-	/* Canceled.  */
-	stc	gbr, r0
-	mov.w	.Lresult, r1
-	mov	#-1, r2
-	mov.l	r2, @(r0,r1)
-	mov.w	.Lchand, r0
-	or.b	#0x10, @(r0,gbr)
-	stc	gbr, r0
-	mov.w	.Lclbuf, r1
-	mov.l	.Lunwind, r2
-	braf	r2
-	 mov.l	@(r0,r1), r4
-.Lunwindb:
-	cfi_endproc
-
-.Lchand:
-	.word	CANCELHANDLING - TLS_PRE_TCB_SIZE
-.Lresult:
-	.word	RESULT - TLS_PRE_TCB_SIZE
-.Lclbuf:
-	.word	CLEANUP_JMP_BUF - TLS_PRE_TCB_SIZE
-	.align	2
+	bra	9b
+	 mov	#-1, r0
+
 .Lgot0:
 	.long	_GLOBAL_OFFSET_TABLE_
 #if USE___THREAD
@@ -161,7 +163,143 @@ __new_sem_wait:
 	.long	__pthread_enable_asynccancel-.Lenable0b
 .Ldisable0:
 	.long	__pthread_disable_asynccancel-.Ldisable0b
-.Lunwind:
-	.long	HIDDEN_JUMPTARGET (__pthread_unwind)-.Lunwindb
 	.size	__new_sem_wait,.-__new_sem_wait
 	versioned_symbol(libpthread, __new_sem_wait, sem_wait, GLIBC_2_1)
+
+
+	.type	sem_wait_cleanup,@function
+sem_wait_cleanup:
+ 	DEC (@(NWAITERS,r8), r2)
+.LcallUR:
+	mov.l	.Lresume, r1
+#ifdef PIC
+	add	r12, r1
+#endif
+	jsr	@r1
+	 nop
+	sleep
+
+	.align	2
+.Lresume:
+#ifdef PIC
+	.long	_Unwind_Resume@GOTOFF
+#else
+	.long	_Unwind_Resume
+#endif
+.LENDCODE:
+	.size	sem_wait_cleanup,.-sem_wait_cleanup
+
+
+	.section .gcc_except_table,"a",@progbits
+.LexceptSTART:
+	.byte	0xff				! @LPStart format (omit)
+	.byte	0xff				! @TType format (omit)
+	.byte	0x01				! call-site format
+						! DW_EH_PE_uleb128
+	.uleb128 .Lcstend-.Lcstbegin
+.Lcstbegin:
+	.uleb128 .LcleanupSTART-.LSTARTCODE
+	.uleb128 .LcleanupEND-.LcleanupSTART
+	.uleb128 sem_wait_cleanup-.LSTARTCODE
+	.uleb128  0
+	.uleb128 .LcallUR-.LSTARTCODE
+	.uleb128 .LENDCODE-.LcallUR
+	.uleb128 0
+	.uleb128  0
+.Lcstend:
+
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+	.ualong	.LENDCIE-.LSTARTCIE		! Length of the CIE.
+.LSTARTCIE:
+	.ualong	0				! CIE ID.
+	.byte	1				! Version number.
+#ifdef SHARED
+	.string	"zPLR"				! NUL-terminated augmentation
+						! string.
+#else
+	.string	"zPL"				! NUL-terminated augmentation
+						! string.
+#endif
+	.uleb128 1				! Code alignment factor.
+	.sleb128 -4				! Data alignment factor.
+	.byte	0x11				! Return address register
+						! column.
+#ifdef SHARED
+	.uleb128 7				! Augmentation value length.
+	.byte	0x9b				! Personality: DW_EH_PE_pcrel
+						! + DW_EH_PE_sdata4
+						! + DW_EH_PE_indirect
+	.ualong	DW.ref.__gcc_personality_v0-.
+	.byte	0x1b				! LSDA Encoding: DW_EH_PE_pcrel
+						! + DW_EH_PE_sdata4.
+	.byte	0x1b				! FDE Encoding: DW_EH_PE_pcrel
+						! + DW_EH_PE_sdata4.
+#else
+	.uleb128 6				! Augmentation value length.
+	.byte	0x0				! Personality: absolute
+	.ualong	__gcc_personality_v0
+	.byte	0x0				! LSDA Encoding: absolute
+#endif
+	.byte 0x0c				! DW_CFA_def_cfa
+	.uleb128 0xf
+	.uleb128 0
+	.align 4
+.LENDCIE:
+
+	.ualong	.LENDFDE-.LSTARTFDE		! Length of the FDE.
+.LSTARTFDE:
+	.ualong	.LSTARTFDE-.LSTARTFRAME		! CIE pointer.
+#ifdef SHARED
+	.ualong	.LSTARTCODE-.			! PC-relative start address
+						! of the code.
+#else
+	.ualong	.LSTARTCODE			! Start address of the code.
+#endif
+	.ualong	.LENDCODE-.LSTARTCODE		! Length of the code.
+	.uleb128 4				! Augmentation size
+#ifdef SHARED
+	.ualong	.LexceptSTART-.
+#else
+	.ualong	.LexceptSTART
+#endif
+
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r8-.LSTARTCODE
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 4
+	.byte   0x88				! DW_CFA_offset r8
+        .uleb128 1
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r10-.Lpush_r8
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 8
+	.byte   0x8a				! DW_CFA_offset r10
+        .uleb128 2
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_r12-.Lpush_r10
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 12
+	.byte   0x8c				! DW_CFA_offset r12
+        .uleb128 3
+	.byte	4				! DW_CFA_advance_loc4
+	.ualong	.Lpush_pr-.Lpush_r12
+	.byte	14				! DW_CFA_def_cfa_offset
+	.uleb128 16
+	.byte   0x91				! DW_CFA_offset pr
+        .uleb128 4
+	.align	4
+.LENDFDE:
+
+
+#ifdef SHARED
+	.hidden	DW.ref.__gcc_personality_v0
+	.weak	DW.ref.__gcc_personality_v0
+	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
+	.align	4
+	.type	DW.ref.__gcc_personality_v0, @object
+	.size	DW.ref.__gcc_personality_v0, 4
+DW.ref.__gcc_personality_v0:
+	.long	__gcc_personality_v0
+#endif