1 files changed, 57 insertions, 10 deletions
diff --git a/nptl/sem_post.c b/nptl/sem_post.c
index d1c39ffe05..9162e4c8a6 100644
--- a/nptl/sem_post.c
+++ b/nptl/sem_post.c
@@ -26,34 +26,78 @@
 
 #include <shlib-compat.h>
 
+/* Wrapper for lll_futex_wake, with error checking.
+   TODO Remove when cleaning up the futex API throughout glibc.  */
+static __always_inline void
+futex_wake (unsigned int* futex, int processes_to_wake, int private)
+{
+  int res = lll_futex_wake (futex, processes_to_wake, private);
+  /* No error.  Ignore the number of woken processes.  */
+  if (res >= 0)
+    return;
+  switch (res)
+    {
+    case -EFAULT: /* Could have happened due to memory reuse.  */
+    case -EINVAL: /* Could be either due to incorrect alignment (a bug in
+		     glibc or in the application) or due to memory being
+		     reused for a PI futex.  We cannot distinguish between the
+		     two causes, and one of them is correct use, so we do not
+		     act in this case.  */
+      return;
+    case -ENOSYS: /* Must have been caused by a glibc bug.  */
+    /* No other errors are documented at this time.  */
+    default:
+      abort ();
+    }
+}
+
+
+/* See sem_wait for an explanation of the algorithm.  */
 int
 __new_sem_post (sem_t *sem)
 {
   struct new_sem *isem = (struct new_sem *) sem;
+  int private = isem->private;
 
-  __typeof (isem->value) cur;
+#if __HAVE_64B_ATOMICS
+  /* Add a token to the semaphore.  We use release MO to make sure that a
+     thread acquiring this token synchronizes with us and other threads that
+     added tokens before (the release sequence includes atomic RMW operations
+     by other threads).  */
+  /* TODO Use atomic_fetch_add to make it scale better than a CAS loop?  */
+  unsigned long int d = atomic_load_relaxed (&isem->data);
   do
     {
-      cur = isem->value;
-      if (isem->value == SEM_VALUE_MAX)
+      if ((d & SEM_VALUE_MASK) == SEM_VALUE_MAX)
 	{
 	  __set_errno (EOVERFLOW);
 	  return -1;
 	}
     }
-  while (atomic_compare_and_exchange_bool_rel (&isem->value, cur + 1, cur));
+  while (!atomic_compare_exchange_weak_release (&isem->data, &d, d + 1));
 
-  atomic_full_barrier ();
-  if (isem->nwaiters > 0)
+  /* If there is any potentially blocked waiter, wake one of them.  */
+  if ((d >> SEM_NWAITERS_SHIFT) > 0)
+    futex_wake (((unsigned int *) &isem->data) + SEM_VALUE_OFFSET, 1, private);
+#else
+  /* Add a token to the semaphore.  Similar to 64b version.  */
+  unsigned int v = atomic_load_relaxed (&isem->value);
+  do
     {
-      int err = lll_futex_wake (&isem->value, 1,
-				isem->private ^ FUTEX_PRIVATE_FLAG);
-      if (__builtin_expect (err, 0) < 0)
+      if ((v << SEM_VALUE_SHIFT) == SEM_VALUE_MAX)
 	{
-	  __set_errno (-err);
+	  __set_errno (EOVERFLOW);
 	  return -1;
 	}
     }
+  while (!atomic_compare_exchange_weak_release (&isem->value,
+      &v, v + (1 << SEM_VALUE_SHIFT)));
+
+  /* If there is any potentially blocked waiter, wake one of them.  */
+  if ((v & SEM_NWAITERS_MASK) != 0)
+    futex_wake (&isem->value, 1, private);
+#endif
+
   return 0;
 }
 versioned_symbol (libpthread, __new_sem_post, sem_post, GLIBC_2_1);
@@ -66,6 +110,9 @@ __old_sem_post (sem_t *sem)
 {
   int *futex = (int *) sem;
 
+  /* We must need to synchronize with consumers of this token, so the atomic
+     increment must have release MO semantics.  */
+  atomic_write_barrier ();
   (void) atomic_increment_val (futex);
   /* We always have to assume it is a shared semaphore.  */
   int err = lll_futex_wake (futex, 1, LLL_SHARED);