diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/s390/elision-lock.c | 37 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/s390/elision-trylock.c | 62 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/s390/elision-unlock.c | 29 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/s390/lowlevellock.h | 4 |
5 files changed, 90 insertions, 54 deletions
diff --git a/ChangeLog b/ChangeLog index a3742b5518..ee841a0d85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com> + * sysdeps/unix/sysv/linux/s390/lowlevellock.h + (__lll_unlock_elision, lll_unlock_elision): Add adapt_count argument. + * sysdeps/unix/sysv/linux/s390/elision-lock.c: + (__lll_lock_elision): Decrement adapt_count while unlocking + instead of before locking. + * sysdeps/unix/sysv/linux/s390/elision-trylock.c + (__lll_trylock_elision): Likewise. + * sysdeps/unix/sysv/linux/s390/elision-unlock.c: + (__lll_unlock_elision): Likewise. + +2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com> + * sysdeps/unix/sysv/linux/s390/htm.h(__libc_tbegin_retry): New macro. * sysdeps/unix/sysv/linux/s390/elision-lock.c (__lll_lock_elision): Use __libc_tbegin_retry macro. diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c index 3dd7fbcd18..4a7d546253 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c @@ -50,31 +50,30 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) critical section uses lock elision) and outside of transactions. Thus, we need to use atomic accesses to avoid data races. However, the value of adapt_count is just a hint, so relaxed MO accesses are - sufficient. */ - if (atomic_load_relaxed (adapt_count) > 0) - { - /* Lost updates are possible, but harmless. Due to races this might lead - to *adapt_count becoming less than zero. */ - atomic_store_relaxed (adapt_count, - atomic_load_relaxed (adapt_count) - 1); - goto use_lock; - } - - if (aconf.try_tbegin > 0) + sufficient. + Do not begin a transaction if another cpu has locked the + futex with normal locking. If adapt_count is zero, it remains and the + next pthread_mutex_lock call will try to start a transaction again. */ + if (atomic_load_relaxed (futex) == 0 + && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0) { int status = __libc_tbegin_retry ((void *) 0, aconf.try_tbegin - 1); if (__builtin_expect (status == _HTM_TBEGIN_STARTED, _HTM_TBEGIN_STARTED)) { - if (__builtin_expect (*futex == 0, 1)) + /* Check the futex to make sure nobody has touched it in the + mean time. This forces the futex into the cache and makes + sure the transaction aborts if some other cpu uses the + lock (writes the futex). */ + if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1)) /* Lock was free. Return to user code in a transaction. */ return 0; /* Lock was busy. Fall back to normal locking. */ - if (__builtin_expect (__libc_tx_nesting_depth (), 1)) + if (__builtin_expect (__libc_tx_nesting_depth () <= 1, 1)) { /* In a non-nested transaction there is no need to abort, - which is expensive. */ + which is expensive. Simply end the started transaction. */ __libc_tend (); /* Don't try to use transactions for the next couple of times. See above for why relaxed MO is sufficient. */ @@ -92,9 +91,9 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) is zero. The adapt_count of this inner mutex is not changed, because using the default lock with the inner mutex - would abort the outer transaction. - */ + would abort the outer transaction. */ __libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); + __builtin_unreachable (); } } else if (status != _HTM_TBEGIN_TRANSIENT) @@ -110,15 +109,15 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) } else { - /* Same logic as above, but for for a number of temporary failures in - a row. */ + /* The transaction failed for some retries with + _HTM_TBEGIN_TRANSIENT. Use the normal locking now and for the + next couple of calls. */ if (aconf.skip_lock_out_of_tbegin_retries > 0) atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries); } } - use_lock: /* Use normal locking as fallback path if transaction does not succeed. */ return LLL_LOCK ((*futex), private); } diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c index e21fc26253..dee66d424b 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c @@ -43,23 +43,36 @@ __lll_trylock_elision (int *futex, short *adapt_count) until their try_tbegin is zero. */ __libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); + __builtin_unreachable (); } - /* Only try a transaction if it's worth it. See __lll_lock_elision for - why we need atomic accesses. Relaxed MO is sufficient because this is - just a hint. */ - if (atomic_load_relaxed (adapt_count) <= 0) + /* adapt_count can be accessed concurrently; these accesses can be both + inside of transactions (if critical sections are nested and the outer + critical section uses lock elision) and outside of transactions. Thus, + we need to use atomic accesses to avoid data races. However, the + value of adapt_count is just a hint, so relaxed MO accesses are + sufficient. + Do not begin a transaction if another cpu has locked the + futex with normal locking. If adapt_count is zero, it remains and the + next pthread_mutex_lock call will try to start a transaction again. */ + if (atomic_load_relaxed (futex) == 0 + && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0) { - int status; - - if (__builtin_expect - ((status = __libc_tbegin ((void *) 0)) == _HTM_TBEGIN_STARTED, 1)) + int status = __libc_tbegin ((void *) 0); + if (__builtin_expect (status == _HTM_TBEGIN_STARTED, + _HTM_TBEGIN_STARTED)) { - if (*futex == 0) + /* Check the futex to make sure nobody has touched it in the + mean time. This forces the futex into the cache and makes + sure the transaction aborts if some other cpu uses the + lock (writes the futex). */ + if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1)) + /* Lock was free. Return to user code in a transaction. */ return 0; - /* Lock was busy. Fall back to normal locking. */ - /* Since we are in a non-nested transaction there is no need to abort, - which is expensive. */ + + /* Lock was busy. Fall back to normal locking. Since we are in + a non-nested transaction there is no need to abort, which is + expensive. Simply end the started transaction. */ __libc_tend (); /* Note: Changing the adapt_count here might abort a transaction on a different cpu, but that could happen anyway when the futex is @@ -68,27 +81,18 @@ __lll_trylock_elision (int *futex, short *adapt_count) if (aconf.skip_lock_busy > 0) atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); } - else + else if (status != _HTM_TBEGIN_TRANSIENT) { - if (status != _HTM_TBEGIN_TRANSIENT) - { - /* A persistent abort (cc 1 or 3) indicates that a retry is - probably futile. Use the normal locking now and for the - next couple of calls. - Be careful to avoid writing to the lock. */ - if (aconf.skip_trylock_internal_abort > 0) - *adapt_count = aconf.skip_trylock_internal_abort; - } + /* A persistent abort (cc 1 or 3) indicates that a retry is + probably futile. Use the normal locking now and for the + next couple of calls. + Be careful to avoid writing to the lock. */ + if (aconf.skip_trylock_internal_abort > 0) + *adapt_count = aconf.skip_trylock_internal_abort; } /* Could do some retries here. */ } - else - { - /* Lost updates are possible, but harmless. Due to races this might lead - to *adapt_count becoming less than zero. */ - atomic_store_relaxed (adapt_count, - atomic_load_relaxed (adapt_count) - 1); - } + /* Use normal locking as fallback path if transaction does not succeed. */ return lll_trylock (*futex); } diff --git a/sysdeps/unix/sysv/linux/s390/elision-unlock.c b/sysdeps/unix/sysv/linux/s390/elision-unlock.c index 0b1ade9e5f..e68d970799 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-unlock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-unlock.c @@ -21,16 +21,37 @@ #include <htm.h> int -__lll_unlock_elision(int *futex, int private) +__lll_unlock_elision(int *futex, short *adapt_count, int private) { /* If the lock is free, we elided the lock earlier. This does not necessarily mean that we are in a transaction, because the user code may - have closed the transaction, but that is impossible to detect reliably. */ - if (*futex == 0) + have closed the transaction, but that is impossible to detect reliably. + Relaxed MO access to futex is sufficient as we only need a hint, if we + started a transaction or acquired the futex in e.g. elision-lock.c. */ + if (atomic_load_relaxed (futex) == 0) { __libc_tend (); } else - lll_unlock ((*futex), private); + { + /* Update the adapt_count while unlocking before completing the critical + section. adapt_count is accessed concurrently outside of a + transaction or an aquired lock e.g. in elision-lock.c so we need to use + atomic accesses. However, the value of adapt_count is just a hint, so + relaxed MO accesses are sufficient. + If adapt_count would be decremented while locking, multiple + CPUs trying to lock the locked mutex will decrement adapt_count to + zero and another CPU will try to start a transaction, which will be + immediately aborted as the mutex is locked. + If adapt_count would be decremented while unlocking after completing + the critical section, possible waiters will be waked up before + decrementing the adapt_count. Those waked up waiters could have + destroyed and freed this mutex! */ + short adapt_count_val = atomic_load_relaxed (adapt_count); + if (adapt_count_val > 0) + atomic_store_relaxed (adapt_count, adapt_count_val - 1); + + lll_unlock ((*futex), private); + } return 0; } diff --git a/sysdeps/unix/sysv/linux/s390/lowlevellock.h b/sysdeps/unix/sysv/linux/s390/lowlevellock.h index ada2e5b484..09a933f421 100644 --- a/sysdeps/unix/sysv/linux/s390/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/s390/lowlevellock.h @@ -33,7 +33,7 @@ extern int __lll_timedlock_elision extern int __lll_lock_elision (int *futex, short *adapt_count, int private) attribute_hidden; -extern int __lll_unlock_elision(int *futex, int private) +extern int __lll_unlock_elision(int *futex, short *adapt_count, int private) attribute_hidden; extern int __lll_trylock_elision(int *futex, short *adapt_count) @@ -42,7 +42,7 @@ extern int __lll_trylock_elision(int *futex, short *adapt_count) # define lll_lock_elision(futex, adapt_count, private) \ __lll_lock_elision (&(futex), &(adapt_count), private) # define lll_unlock_elision(futex, adapt_count, private) \ - __lll_unlock_elision (&(futex), private) + __lll_unlock_elision (&(futex), &(adapt_count), private) # define lll_trylock_elision(futex, adapt_count) \ __lll_trylock_elision(&(futex), &(adapt_count)) # endif /* ENABLE_LOCK_ELISION */ |