about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--sysdeps/unix/sysv/linux/s390/elision-lock.c37
-rw-r--r--sysdeps/unix/sysv/linux/s390/elision-trylock.c62
-rw-r--r--sysdeps/unix/sysv/linux/s390/elision-unlock.c29
-rw-r--r--sysdeps/unix/sysv/linux/s390/lowlevellock.h4
5 files changed, 90 insertions, 54 deletions
diff --git a/ChangeLog b/ChangeLog
index a3742b5518..ee841a0d85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
 2016-12-20  Stefan Liebler  <stli@linux.vnet.ibm.com>
 
+	* sysdeps/unix/sysv/linux/s390/lowlevellock.h
+	(__lll_unlock_elision, lll_unlock_elision): Add adapt_count argument.
+	* sysdeps/unix/sysv/linux/s390/elision-lock.c:
+	(__lll_lock_elision): Decrement adapt_count while unlocking
+	instead of before locking.
+	* sysdeps/unix/sysv/linux/s390/elision-trylock.c
+	(__lll_trylock_elision): Likewise.
+	* sysdeps/unix/sysv/linux/s390/elision-unlock.c:
+	(__lll_unlock_elision): Likewise.
+
+2016-12-20  Stefan Liebler  <stli@linux.vnet.ibm.com>
+
 	* sysdeps/unix/sysv/linux/s390/htm.h(__libc_tbegin_retry): New macro.
 	* sysdeps/unix/sysv/linux/s390/elision-lock.c (__lll_lock_elision):
 	Use __libc_tbegin_retry macro.
diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c
index 3dd7fbcd18..4a7d546253 100644
--- a/sysdeps/unix/sysv/linux/s390/elision-lock.c
+++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c
@@ -50,31 +50,30 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
      critical section uses lock elision) and outside of transactions.  Thus,
      we need to use atomic accesses to avoid data races.  However, the
      value of adapt_count is just a hint, so relaxed MO accesses are
-     sufficient.  */
-  if (atomic_load_relaxed (adapt_count) > 0)
-    {
-      /* Lost updates are possible, but harmless.  Due to races this might lead
-	 to *adapt_count becoming less than zero.  */
-      atomic_store_relaxed (adapt_count,
-			    atomic_load_relaxed (adapt_count) - 1);
-      goto use_lock;
-    }
-
-  if (aconf.try_tbegin > 0)
+     sufficient.
+     Do not begin a transaction if another cpu has locked the
+     futex with normal locking.  If adapt_count is zero, it remains and the
+     next pthread_mutex_lock call will try to start a transaction again.  */
+    if (atomic_load_relaxed (futex) == 0
+	&& atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
     {
       int status = __libc_tbegin_retry ((void *) 0, aconf.try_tbegin - 1);
       if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
 			    _HTM_TBEGIN_STARTED))
 	{
-	  if (__builtin_expect (*futex == 0, 1))
+	  /* Check the futex to make sure nobody has touched it in the
+	     mean time.  This forces the futex into the cache and makes
+	     sure the transaction aborts if some other cpu uses the
+	     lock (writes the futex).  */
+	  if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
 	    /* Lock was free.  Return to user code in a transaction.  */
 	    return 0;
 
 	  /* Lock was busy.  Fall back to normal locking.  */
-	  if (__builtin_expect (__libc_tx_nesting_depth (), 1))
+	  if (__builtin_expect (__libc_tx_nesting_depth () <= 1, 1))
 	    {
 	      /* In a non-nested transaction there is no need to abort,
-		 which is expensive.  */
+		 which is expensive.  Simply end the started transaction.  */
 	      __libc_tend ();
 	      /* Don't try to use transactions for the next couple of times.
 		 See above for why relaxed MO is sufficient.  */
@@ -92,9 +91,9 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
 		 is zero.
 		 The adapt_count of this inner mutex is not changed,
 		 because using the default lock with the inner mutex
-		 would abort the outer transaction.
-	      */
+		 would abort the outer transaction.  */
 	      __libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
+	      __builtin_unreachable ();
 	    }
 	}
       else if (status != _HTM_TBEGIN_TRANSIENT)
@@ -110,15 +109,15 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
 	}
       else
 	{
-	  /* Same logic as above, but for for a number of temporary failures in
-	     a row.  */
+	  /* The transaction failed for some retries with
+	     _HTM_TBEGIN_TRANSIENT.  Use the normal locking now and for the
+	     next couple of calls.  */
 	  if (aconf.skip_lock_out_of_tbegin_retries > 0)
 	    atomic_store_relaxed (adapt_count,
 				  aconf.skip_lock_out_of_tbegin_retries);
 	}
     }
 
-  use_lock:
   /* Use normal locking as fallback path if transaction does not succeed.  */
   return LLL_LOCK ((*futex), private);
 }
diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
index e21fc26253..dee66d424b 100644
--- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c
+++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
@@ -43,23 +43,36 @@ __lll_trylock_elision (int *futex, short *adapt_count)
 	 until their try_tbegin is zero.
       */
       __libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
+      __builtin_unreachable ();
     }
 
-  /* Only try a transaction if it's worth it.  See __lll_lock_elision for
-     why we need atomic accesses.  Relaxed MO is sufficient because this is
-     just a hint.  */
-  if (atomic_load_relaxed (adapt_count) <= 0)
+  /* adapt_count can be accessed concurrently; these accesses can be both
+     inside of transactions (if critical sections are nested and the outer
+     critical section uses lock elision) and outside of transactions.  Thus,
+     we need to use atomic accesses to avoid data races.  However, the
+     value of adapt_count is just a hint, so relaxed MO accesses are
+     sufficient.
+     Do not begin a transaction if another cpu has locked the
+     futex with normal locking.  If adapt_count is zero, it remains and the
+     next pthread_mutex_lock call will try to start a transaction again.  */
+    if (atomic_load_relaxed (futex) == 0
+	&& atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
     {
-      int status;
-
-      if (__builtin_expect
-	  ((status = __libc_tbegin ((void *) 0)) == _HTM_TBEGIN_STARTED, 1))
+      int status = __libc_tbegin ((void *) 0);
+      if (__builtin_expect (status  == _HTM_TBEGIN_STARTED,
+			    _HTM_TBEGIN_STARTED))
 	{
-	  if (*futex == 0)
+	  /* Check the futex to make sure nobody has touched it in the
+	     mean time.  This forces the futex into the cache and makes
+	     sure the transaction aborts if some other cpu uses the
+	     lock (writes the futex).  */
+	  if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
+	    /* Lock was free.  Return to user code in a transaction.  */
 	    return 0;
-	  /* Lock was busy.  Fall back to normal locking.  */
-	  /* Since we are in a non-nested transaction there is no need to abort,
-	     which is expensive.  */
+
+	  /* Lock was busy.  Fall back to normal locking.  Since we are in
+	     a non-nested transaction there is no need to abort, which is
+	     expensive.  Simply end the started transaction.  */
 	  __libc_tend ();
 	  /* Note: Changing the adapt_count here might abort a transaction on a
 	     different cpu, but that could happen anyway when the futex is
@@ -68,27 +81,18 @@ __lll_trylock_elision (int *futex, short *adapt_count)
 	  if (aconf.skip_lock_busy > 0)
 	    atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
 	}
-      else
+      else if (status != _HTM_TBEGIN_TRANSIENT)
 	{
-	  if (status != _HTM_TBEGIN_TRANSIENT)
-	    {
-	      /* A persistent abort (cc 1 or 3) indicates that a retry is
-		 probably futile.  Use the normal locking now and for the
-		 next couple of calls.
-		 Be careful to avoid writing to the lock.  */
-	      if (aconf.skip_trylock_internal_abort > 0)
-		*adapt_count = aconf.skip_trylock_internal_abort;
-	    }
+	  /* A persistent abort (cc 1 or 3) indicates that a retry is
+	     probably futile.  Use the normal locking now and for the
+	     next couple of calls.
+	     Be careful to avoid writing to the lock.  */
+	  if (aconf.skip_trylock_internal_abort > 0)
+	    *adapt_count = aconf.skip_trylock_internal_abort;
 	}
       /* Could do some retries here.  */
     }
-  else
-    {
-      /* Lost updates are possible, but harmless.  Due to races this might lead
-	 to *adapt_count becoming less than zero.  */
-      atomic_store_relaxed (adapt_count,
-			    atomic_load_relaxed (adapt_count) - 1);
-    }
 
+  /* Use normal locking as fallback path if transaction does not succeed.  */
   return lll_trylock (*futex);
 }
diff --git a/sysdeps/unix/sysv/linux/s390/elision-unlock.c b/sysdeps/unix/sysv/linux/s390/elision-unlock.c
index 0b1ade9e5f..e68d970799 100644
--- a/sysdeps/unix/sysv/linux/s390/elision-unlock.c
+++ b/sysdeps/unix/sysv/linux/s390/elision-unlock.c
@@ -21,16 +21,37 @@
 #include <htm.h>
 
 int
-__lll_unlock_elision(int *futex, int private)
+__lll_unlock_elision(int *futex, short *adapt_count, int private)
 {
   /* If the lock is free, we elided the lock earlier.  This does not
      necessarily mean that we are in a transaction, because the user code may
-     have closed the transaction, but that is impossible to detect reliably.  */
-  if (*futex == 0)
+     have closed the transaction, but that is impossible to detect reliably.
+     Relaxed MO access to futex is sufficient as we only need a hint, if we
+     started a transaction or acquired the futex in e.g. elision-lock.c.  */
+  if (atomic_load_relaxed (futex) == 0)
     {
       __libc_tend ();
     }
   else
-    lll_unlock ((*futex), private);
+    {
+      /* Update the adapt_count while unlocking before completing the critical
+	 section.  adapt_count is accessed concurrently outside of a
+	 transaction or an aquired lock e.g. in elision-lock.c so we need to use
+	 atomic accesses.  However, the value of adapt_count is just a hint, so
+	 relaxed MO accesses are sufficient.
+	 If adapt_count would be decremented while locking, multiple
+	 CPUs trying to lock the locked mutex will decrement adapt_count to
+	 zero and another CPU will try to start a transaction, which will be
+	 immediately aborted as the mutex is locked.
+	 If adapt_count would be decremented while unlocking after completing
+	 the critical section, possible waiters will be waked up before
+	 decrementing the adapt_count.  Those waked up waiters could have
+	 destroyed and freed this mutex!  */
+      short adapt_count_val = atomic_load_relaxed (adapt_count);
+      if (adapt_count_val > 0)
+	atomic_store_relaxed (adapt_count, adapt_count_val - 1);
+
+      lll_unlock ((*futex), private);
+    }
   return 0;
 }
diff --git a/sysdeps/unix/sysv/linux/s390/lowlevellock.h b/sysdeps/unix/sysv/linux/s390/lowlevellock.h
index ada2e5b484..09a933f421 100644
--- a/sysdeps/unix/sysv/linux/s390/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/s390/lowlevellock.h
@@ -33,7 +33,7 @@ extern int __lll_timedlock_elision
 extern int __lll_lock_elision (int *futex, short *adapt_count, int private)
   attribute_hidden;
 
-extern int __lll_unlock_elision(int *futex, int private)
+extern int __lll_unlock_elision(int *futex, short *adapt_count, int private)
   attribute_hidden;
 
 extern int __lll_trylock_elision(int *futex, short *adapt_count)
@@ -42,7 +42,7 @@ extern int __lll_trylock_elision(int *futex, short *adapt_count)
 #  define lll_lock_elision(futex, adapt_count, private) \
   __lll_lock_elision (&(futex), &(adapt_count), private)
 #  define lll_unlock_elision(futex, adapt_count, private) \
-  __lll_unlock_elision (&(futex), private)
+  __lll_unlock_elision (&(futex), &(adapt_count), private)
 #  define lll_trylock_elision(futex, adapt_count) \
   __lll_trylock_elision(&(futex), &(adapt_count))
 # endif  /* ENABLE_LOCK_ELISION */