diff options
Diffstat (limited to 'nptl/pthread_cond_wait.c')
-rw-r--r-- | nptl/pthread_cond_wait.c | 754 |
1 files changed, 595 insertions, 159 deletions
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 3f62acc6bd..2b434026c6 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -19,219 +19,655 @@ #include <endian.h> #include <errno.h> #include <sysdep.h> -#include <lowlevellock.h> +#include <futex-internal.h> #include <pthread.h> #include <pthreadP.h> -#include <kernel-features.h> +#include <sys/time.h> +#include <atomic.h> +#include <stdint.h> +#include <stdbool.h> #include <shlib-compat.h> #include <stap-probe.h> +#include <time.h> + +#include "pthread_cond_common.c" + struct _condvar_cleanup_buffer { - int oldtype; + uint64_t wseq; pthread_cond_t *cond; pthread_mutex_t *mutex; - unsigned int bc_seq; + int private; }; -void -__attribute__ ((visibility ("hidden"))) -__condvar_cleanup (void *arg) +/* Decrease the waiter reference count. */ +static void +__condvar_confirm_wakeup (pthread_cond_t *cond, int private) { - struct _condvar_cleanup_buffer *cbuffer = - (struct _condvar_cleanup_buffer *) arg; - unsigned int destroying; - int pshared = (cbuffer->cond->__data.__mutex == (void *) ~0l) - ? LLL_SHARED : LLL_PRIVATE; + /* If destruction is pending (i.e., the wake-request flag is nonzero) and we + are the last waiter (prior value of __wrefs was 1 << 3), then wake any + threads waiting in pthread_cond_destroy. Release MO to synchronize with + these threads. Don't bother clearing the wake-up request flag. */ + if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3) + futex_wake (&cond->__data.__wrefs, INT_MAX, private); +} + - /* We are going to modify shared data. */ - lll_lock (cbuffer->cond->__data.__lock, pshared); +/* Cancel waiting after having registered as a waiter previously. SEQ is our + position and G is our group index. + The goal of cancellation is to make our group smaller if that is still + possible. If we are in a closed group, this is not possible anymore; in + this case, we need to send a replacement signal for the one we effectively + consumed because the signal should have gotten consumed by another waiter + instead; we must not both cancel waiting and consume a signal. + + Must not be called while still holding a reference on the group. + + Returns true iff we consumed a signal. + + On some kind of timeouts, we may be able to pretend that a signal we + effectively consumed happened before the timeout (i.e., similarly to first + spinning on signals before actually checking whether the timeout has + passed already). Doing this would allow us to skip sending a replacement + signal, but this case might happen rarely because the end of the timeout + must race with someone else sending a signal. Therefore, we don't bother + trying to optimize this. */ +static void +__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, + int private) +{ + bool consumed_signal = false; - if (cbuffer->bc_seq == cbuffer->cond->__data.__broadcast_seq) + /* No deadlock with group switching is possible here because we have do + not hold a reference on the group. */ + __condvar_acquire_lock (cond, private); + + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; + if (g1_start > seq) + { + /* Our group is closed, so someone provided enough signals for it. + Thus, we effectively consumed a signal. */ + consumed_signal = true; + } + else { - /* This thread is not waiting anymore. Adjust the sequence counters - appropriately. We do not increment WAKEUP_SEQ if this would - bump it over the value of TOTAL_SEQ. This can happen if a thread - was woken and then canceled. */ - if (cbuffer->cond->__data.__wakeup_seq - < cbuffer->cond->__data.__total_seq) + if (g1_start + __condvar_get_orig_size (cond) <= seq) + { + /* We are in the current G2 and thus cannot have consumed a signal. + Reduce its effective size or handle overflow. Remember that in + G2, unsigned int size is zero or a negative value. */ + if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0) + { + cond->__data.__g_size[g]--; + } + else + { + /* Cancellations would overflow the maximum group size. Just + wake up everyone spuriously to create a clean state. This + also means we do not consume a signal someone else sent. */ + __condvar_release_lock (cond, private); + __pthread_cond_broadcast (cond); + return; + } + } + else { - ++cbuffer->cond->__data.__wakeup_seq; - ++cbuffer->cond->__data.__futex; + /* We are in current G1. If the group's size is zero, someone put + a signal in the group that nobody else but us can consume. */ + if (cond->__data.__g_size[g] == 0) + consumed_signal = true; + else + { + /* Otherwise, we decrease the size of the group. This is + equivalent to atomically putting in a signal just for us and + consuming it right away. We do not consume a signal sent + by someone else. We also cannot have consumed a futex + wake-up because if we were cancelled or timed out in a futex + call, the futex will wake another waiter. */ + cond->__data.__g_size[g]--; + } } - ++cbuffer->cond->__data.__woken_seq; } - cbuffer->cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT; + __condvar_release_lock (cond, private); - /* If pthread_cond_destroy was called on this variable already, - notify the pthread_cond_destroy caller all waiters have left - and it can be successfully destroyed. */ - destroying = 0; - if (cbuffer->cond->__data.__total_seq == -1ULL - && cbuffer->cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT)) + if (consumed_signal) { - lll_futex_wake (&cbuffer->cond->__data.__nwaiters, 1, pshared); - destroying = 1; + /* We effectively consumed a signal even though we didn't want to. + Therefore, we need to send a replacement signal. + If we would want to optimize this, we could do what + pthread_cond_signal does right in the critical section above. */ + __pthread_cond_signal (cond); } +} - /* We are done. */ - lll_unlock (cbuffer->cond->__data.__lock, pshared); - - /* Wake everybody to make sure no condvar signal gets lost. */ - if (! destroying) - lll_futex_wake (&cbuffer->cond->__data.__futex, INT_MAX, pshared); - - /* Get the mutex before returning unless asynchronous cancellation - is in effect. We don't try to get the mutex if we already own it. */ - if (!(USE_REQUEUE_PI (cbuffer->mutex)) - || ((cbuffer->mutex->__data.__lock & FUTEX_TID_MASK) - != THREAD_GETMEM (THREAD_SELF, tid))) - { - __pthread_mutex_cond_lock (cbuffer->mutex); - } - else - __pthread_mutex_cond_lock_adjust (cbuffer->mutex); +/* Wake up any signalers that might be waiting. */ +static void +__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private) +{ + /* Release MO to synchronize-with the acquire load in + __condvar_quiesce_and_switch_g1. */ + if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3) + { + /* Clear the wake-up request flag before waking up. We do not need more + than relaxed MO and it doesn't matter if we apply this for an aliased + group because we wake all futex waiters right after clearing the + flag. */ + atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1); + futex_wake (cond->__data.__g_refs + g, INT_MAX, private); + } } +/* Clean-up for cancellation of waiters waiting for normal signals. We cancel + our registration as a waiter, confirm we have woken up, and re-acquire the + mutex. */ +static void +__condvar_cleanup_waiting (void *arg) +{ + struct _condvar_cleanup_buffer *cbuffer = + (struct _condvar_cleanup_buffer *) arg; + pthread_cond_t *cond = cbuffer->cond; + unsigned g = cbuffer->wseq & 1; -int -__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) + __condvar_dec_grefs (cond, g, cbuffer->private); + + __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private); + /* FIXME With the current cancellation implementation, it is possible that + a thread is cancelled after it has returned from a syscall. This could + result in a cancelled waiter consuming a futex wake-up that is then + causing another waiter in the same group to not wake up. To work around + this issue until we have fixed cancellation, just add a futex wake-up + conservatively. */ + futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private); + + __condvar_confirm_wakeup (cond, cbuffer->private); + + /* XXX If locking the mutex fails, should we just stop execution? This + might be better than silently ignoring the error. */ + __pthread_mutex_cond_lock (cbuffer->mutex); +} + +/* This condvar implementation guarantees that all calls to signal and + broadcast and all of the three virtually atomic parts of each call to wait + (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re- + acquiring the mutex) happen in some total order that is consistent with the + happens-before relations in the calling program. However, this order does + not necessarily result in additional happens-before relations being + established (which aligns well with spurious wake-ups being allowed). + + All waiters acquire a certain position in a 64b waiter sequence (__wseq). + This sequence determines which waiters are allowed to consume signals. + A broadcast is equal to sending as many signals as are unblocked waiters. + When a signal arrives, it samples the current value of __wseq with a + relaxed-MO load (i.e., the position the next waiter would get). (This is + sufficient because it is consistent with happens-before; the caller can + enforce stronger ordering constraints by calling signal while holding the + mutex.) Only waiters with a position less than the __wseq value observed + by the signal are eligible to consume this signal. + + This would be straight-forward to implement if waiters would just spin but + we need to let them block using futexes. Futexes give no guarantee of + waking in FIFO order, so we cannot reliably wake eligible waiters if we + just use a single futex. Also, futex words are 32b in size, but we need + to distinguish more than 1<<32 states because we need to represent the + order of wake-up (and thus which waiters are eligible to consume signals); + blocking in a futex is not atomic with a waiter determining its position in + the waiter sequence, so we need the futex word to reliably notify waiters + that they should not attempt to block anymore because they have been + already signaled in the meantime. While an ABA issue on a 32b value will + be rare, ignoring it when we are aware of it is not the right thing to do + either. + + Therefore, we use a 64b counter to represent the waiter sequence (on + architectures which only support 32b atomics, we use a few bits less). + To deal with the blocking using futexes, we maintain two groups of waiters: + * Group G1 consists of waiters that are all eligible to consume signals; + incoming signals will always signal waiters in this group until all + waiters in G1 have been signaled. + * Group G2 consists of waiters that arrive when a G1 is present and still + contains waiters that have not been signaled. When all waiters in G1 + are signaled and a new signal arrives, the new signal will convert G2 + into the new G1 and create a new G2 for future waiters. + + We cannot allocate new memory because of process-shared condvars, so we + have just two slots of groups that change their role between G1 and G2. + Each has a separate futex word, a number of signals available for + consumption, a size (number of waiters in the group that have not been + signaled), and a reference count. + + The group reference count is used to maintain the number of waiters that + are using the group's futex. Before a group can change its role, the + reference count must show that no waiters are using the futex anymore; this + prevents ABA issues on the futex word. + + To represent which intervals in the waiter sequence the groups cover (and + thus also which group slot contains G1 or G2), we use a 64b counter to + designate the start position of G1 (inclusive), and a single bit in the + waiter sequence counter to represent which group slot currently contains + G2. This allows us to switch group roles atomically wrt. waiters obtaining + a position in the waiter sequence. The G1 start position allows waiters to + figure out whether they are in a group that has already been completely + signaled (i.e., if the current G1 starts at a later position that the + waiter's position). Waiters cannot determine whether they are currently + in G2 or G1 -- but they do not have too because all they are interested in + is whether there are available signals, and they always start in G2 (whose + group slot they know because of the bit in the waiter sequence. Signalers + will simply fill the right group until it is completely signaled and can + be closed (they do not switch group roles until they really have to to + decrease the likelihood of having to wait for waiters still holding a + reference on the now-closed G1). + + Signalers maintain the initial size of G1 to be able to determine where + G2 starts (G2 is always open-ended until it becomes G1). They track the + remaining size of a group; when waiters cancel waiting (due to PThreads + cancellation or timeouts), they will decrease this remaining size as well. + + To implement condvar destruction requirements (i.e., that + pthread_cond_destroy can be called as soon as all waiters have been + signaled), waiters increment a reference count before starting to wait and + decrement it after they stopped waiting but right before they acquire the + mutex associated with the condvar. + + pthread_cond_t thus consists of the following (bits that are used for + flags and are not part of the primary value of each field but necessary + to make some things atomic or because there was no space for them + elsewhere in the data structure): + + __wseq: Waiter sequence counter + * LSB is index of current G2. + * Waiters fetch-add while having acquire the mutex associated with the + condvar. Signalers load it and fetch-xor it concurrently. + __g1_start: Starting position of G1 (inclusive) + * LSB is index of current G2. + * Modified by signalers while having acquired the condvar-internal lock + and observed concurrently by waiters. + __g1_orig_size: Initial size of G1 + * The two least-significant bits represent the condvar-internal lock. + * Only accessed while having acquired the condvar-internal lock. + __wrefs: Waiter reference counter. + * Bit 2 is true if waiters should run futex_wake when they remove the + last reference. pthread_cond_destroy uses this as futex word. + * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC). + * Bit 0 is true iff this is a process-shared condvar. + * Simple reference count used by both waiters and pthread_cond_destroy. + (If the format of __wrefs is changed, update nptl_lock_constants.pysym + and the pretty printers.) + For each of the two groups, we have: + __g_refs: Futex waiter reference count. + * LSB is true if waiters should run futex_wake when they remove the + last reference. + * Reference count used by waiters concurrently with signalers that have + acquired the condvar-internal lock. + __g_signals: The number of signals that can still be consumed. + * Used as a futex word by waiters. Used concurrently by waiters and + signalers. + * LSB is true iff this group has been completely signaled (i.e., it is + closed). + __g_size: Waiters remaining in this group (i.e., which have not been + signaled yet. + * Accessed by signalers and waiters that cancel waiting (both do so only + when having acquired the condvar-internal lock. + * The size of G2 is always zero because it cannot be determined until + the group becomes G1. + * Although this is of unsigned type, we rely on using unsigned overflow + rules to make this hold effectively negative values too (in + particular, when waiters in G2 cancel waiting). + + A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields + a condvar that has G2 starting at position 0 and a G1 that is closed. + + Because waiters do not claim ownership of a group right when obtaining a + position in __wseq but only reference count the group when using futexes + to block, it can happen that a group gets closed before a waiter can + increment the reference count. Therefore, waiters have to check whether + their group is already closed using __g1_start. They also have to perform + this check when spinning when trying to grab a signal from __g_signals. + Note that for these checks, using relaxed MO to load __g1_start is + sufficient because if a waiter can see a sufficiently large value, it could + have also consume a signal in the waiters group. + + Waiters try to grab a signal from __g_signals without holding a reference + count, which can lead to stealing a signal from a more recent group after + their own group was already closed. They cannot always detect whether they + in fact did because they do not know when they stole, but they can + conservatively add a signal back to the group they stole from; if they + did so unnecessarily, all that happens is a spurious wake-up. To make this + even less likely, __g1_start contains the index of the current g2 too, + which allows waiters to check if there aliasing on the group slots; if + there wasn't, they didn't steal from the current G1, which means that the + G1 they stole from must have been already closed and they do not need to + fix anything. + + It is essential that the last field in pthread_cond_t is __g_signals[1]: + The previous condvar used a pointer-sized field in pthread_cond_t, so a + PTHREAD_COND_INITIALIZER from that condvar implementation might only + initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes + in total instead of the 48 we need). __g_signals[1] is not accessed before + the first group switch (G2 starts at index 0), which will set its value to + zero after a harmless fetch-or whose return value is ignored. This + effectively completes initialization. + + + Limitations: + * This condvar isn't designed to allow for more than + __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait. + * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not + supported. + * Beyond what is allowed as errors by POSIX or documented, we can also + return the following errors: + * EPERM if MUTEX is a recursive mutex and the caller doesn't own it. + * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike + for other errors, this can happen when we re-acquire the mutex; this + isn't allowed by POSIX (which requires all errors to virtually happen + before we release the mutex or change the condvar state), but there's + nothing we can do really. + * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors + returned by __pthread_tpp_change_priority. We will already have + released the mutex in such cases, so the caller cannot expect to own + MUTEX. + + Other notes: + * Instead of the normal mutex unlock / lock functions, we use + __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m) + because those will not change the mutex-internal users count, so that it + can be detected when a condvar is still associated with a particular + mutex because there is a waiter blocked on this condvar using this mutex. +*/ +static __always_inline int +__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) { - struct _pthread_cleanup_buffer buffer; - struct _condvar_cleanup_buffer cbuffer; + const int maxspin = 0; int err; - int pshared = (cond->__data.__mutex == (void *) ~0l) - ? LLL_SHARED : LLL_PRIVATE; - -#if (defined lll_futex_wait_requeue_pi \ - && defined __ASSUME_REQUEUE_PI) - int pi_flag = 0; -#endif + int result = 0; LIBC_PROBE (cond_wait, 2, cond, mutex); - /* Make sure we are alone. */ - lll_lock (cond->__data.__lock, pshared); - - /* Now we can release the mutex. */ + /* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an + atomic operation because signals and broadcasts may update the group + switch without acquiring the mutex. We do not need release MO here + because we do not need to establish any happens-before relation with + signalers (see __pthread_cond_signal); modification order alone + establishes a total order of waiters/signals. We do need acquire MO + to synchronize with group reinitialization in + __condvar_quiesce_and_switch_g1. */ + uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2); + /* Find our group's index. We always go into what was G2 when we acquired + our position. */ + unsigned int g = wseq & 1; + uint64_t seq = wseq >> 1; + + /* Increase the waiter reference count. Relaxed MO is sufficient because + we only need to synchronize when decrementing the reference count. */ + unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8); + int private = __condvar_get_private (flags); + + /* Now that we are registered as a waiter, we can release the mutex. + Waiting on the condvar must be atomic with releasing the mutex, so if + the mutex is used to establish a happens-before relation with any + signaler, the waiter must be visible to the latter; thus, we release the + mutex after registering as waiter. + If releasing the mutex fails, we just cancel our registration as a + waiter and confirm that we have woken up. */ err = __pthread_mutex_unlock_usercnt (mutex, 0); - if (__glibc_unlikely (err)) + if (__glibc_unlikely (err != 0)) { - lll_unlock (cond->__data.__lock, pshared); + __condvar_cancel_waiting (cond, seq, g, private); + __condvar_confirm_wakeup (cond, private); return err; } - /* We have one new user of the condvar. */ - ++cond->__data.__total_seq; - ++cond->__data.__futex; - cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT; - - /* Remember the mutex we are using here. If there is already a - different address store this is a bad user bug. Do not store - anything for pshared condvars. */ - if (cond->__data.__mutex != (void *) ~0l) - cond->__data.__mutex = mutex; - - /* Prepare structure passed to cancellation handler. */ - cbuffer.cond = cond; - cbuffer.mutex = mutex; - - /* Before we block we enable cancellation. Therefore we have to - install a cancellation handler. */ - __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer); - - /* The current values of the wakeup counter. The "woken" counter - must exceed this value. */ - unsigned long long int val; - unsigned long long int seq; - val = seq = cond->__data.__wakeup_seq; - /* Remember the broadcast counter. */ - cbuffer.bc_seq = cond->__data.__broadcast_seq; + /* Now wait until a signal is available in our group or it is closed. + Acquire MO so that if we observe a value of zero written after group + switching in __condvar_quiesce_and_switch_g1, we synchronize with that + store and will see the prior update of __g1_start done while switching + groups too. */ + unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); do { - unsigned int futex_val = cond->__data.__futex; - /* Prepare to wait. Release the condvar futex. */ - lll_unlock (cond->__data.__lock, pshared); - - /* Enable asynchronous cancellation. Required by the standard. */ - cbuffer.oldtype = __pthread_enable_asynccancel (); - -#if (defined lll_futex_wait_requeue_pi \ - && defined __ASSUME_REQUEUE_PI) - /* If pi_flag remained 1 then it means that we had the lock and the mutex - but a spurious waker raced ahead of us. Give back the mutex before - going into wait again. */ - if (pi_flag) + while (1) { - __pthread_mutex_cond_lock_adjust (mutex); - __pthread_mutex_unlock_usercnt (mutex, 0); + /* Spin-wait first. + Note that spinning first without checking whether a timeout + passed might lead to what looks like a spurious wake-up even + though we should return ETIMEDOUT (e.g., if the caller provides + an absolute timeout that is clearly in the past). However, + (1) spurious wake-ups are allowed, (2) it seems unlikely that a + user will (ab)use pthread_cond_wait as a check for whether a + point in time is in the past, and (3) spinning first without + having to compare against the current time seems to be the right + choice from a performance perspective for most use cases. */ + unsigned int spin = maxspin; + while (signals == 0 && spin > 0) + { + /* Check that we are not spinning on a group that's already + closed. */ + if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) + goto done; + + /* TODO Back off. */ + + /* Reload signals. See above for MO. */ + signals = atomic_load_acquire (cond->__data.__g_signals + g); + spin--; + } + + /* If our group will be closed as indicated by the flag on signals, + don't bother grabbing a signal. */ + if (signals & 1) + goto done; + + /* If there is an available signal, don't block. */ + if (signals != 0) + break; + + /* No signals available after spinning, so prepare to block. + We first acquire a group reference and use acquire MO for that so + that we synchronize with the dummy read-modify-write in + __condvar_quiesce_and_switch_g1 if we read from that. In turn, + in this case this will make us see the closed flag on __g_signals + that designates a concurrent attempt to reuse the group's slot. + We use acquire MO for the __g_signals check to make the + __g1_start check work (see spinning above). + Note that the group reference acquisition will not mask the + release MO when decrementing the reference count because we use + an atomic read-modify-write operation and thus extend the release + sequence. */ + atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); + if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0) + || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))) + { + /* Our group is closed. Wake up any signalers that might be + waiting. */ + __condvar_dec_grefs (cond, g, private); + goto done; + } + + // Now block. + struct _pthread_cleanup_buffer buffer; + struct _condvar_cleanup_buffer cbuffer; + cbuffer.wseq = wseq; + cbuffer.cond = cond; + cbuffer.mutex = mutex; + cbuffer.private = private; + __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); + + if (abstime == NULL) + { + /* Block without a timeout. */ + err = futex_wait_cancelable ( + cond->__data.__g_signals + g, 0, private); + } + else + { + /* Block, but with a timeout. + Work around the fact that the kernel rejects negative timeout + values despite them being valid. */ + if (__glibc_unlikely (abstime->tv_sec < 0)) + err = ETIMEDOUT; + + else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0) + { + /* CLOCK_MONOTONIC is requested. */ + struct timespec rt; + if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0) + __libc_fatal ("clock_gettime does not support " + "CLOCK_MONOTONIC"); + /* Convert the absolute timeout value to a relative + timeout. */ + rt.tv_sec = abstime->tv_sec - rt.tv_sec; + rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec; + if (rt.tv_nsec < 0) + { + rt.tv_nsec += 1000000000; + --rt.tv_sec; + } + /* Did we already time out? */ + if (__glibc_unlikely (rt.tv_sec < 0)) + err = ETIMEDOUT; + else + err = futex_reltimed_wait_cancelable + (cond->__data.__g_signals + g, 0, &rt, private); + } + else + { + /* Use CLOCK_REALTIME. */ + err = futex_abstimed_wait_cancelable + (cond->__data.__g_signals + g, 0, abstime, private); + } + } + + __pthread_cleanup_pop (&buffer, 0); + + if (__glibc_unlikely (err == ETIMEDOUT)) + { + __condvar_dec_grefs (cond, g, private); + /* If we timed out, we effectively cancel waiting. Note that + we have decremented __g_refs before cancellation, so that a + deadlock between waiting for quiescence of our group in + __condvar_quiesce_and_switch_g1 and us trying to acquire + the lock during cancellation is not possible. */ + __condvar_cancel_waiting (cond, seq, g, private); + result = ETIMEDOUT; + goto done; + } + else + __condvar_dec_grefs (cond, g, private); + + /* Reload signals. See above for MO. */ + signals = atomic_load_acquire (cond->__data.__g_signals + g); } - pi_flag = USE_REQUEUE_PI (mutex); - if (pi_flag) + } + /* Try to grab a signal. Use acquire MO so that we see an up-to-date value + of __g1_start below (see spinning above for a similar case). In + particular, if we steal from a more recent group, we will also see a + more recent __g1_start below. */ + while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, + &signals, signals - 2)); + + /* We consumed a signal but we could have consumed from a more recent group + that aliased with ours due to being in the same group slot. If this + might be the case our group must be closed as visible through + __g1_start. */ + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + if (seq < (g1_start >> 1)) + { + /* We potentially stole a signal from a more recent group but we do not + know which group we really consumed from. + We do not care about groups older than current G1 because they are + closed; we could have stolen from these, but then we just add a + spurious wake-up for the current groups. + We will never steal a signal from current G2 that was really intended + for G2 because G2 never receives signals (until it becomes G1). We + could have stolen a signal from G2 that was conservatively added by a + previous waiter that also thought it stole a signal -- but given that + that signal was added unnecessarily, it's not a problem if we steal + it. + Thus, the remaining case is that we could have stolen from the current + G1, where "current" means the __g1_start value we observed. However, + if the current G1 does not have the same slot index as we do, we did + not steal from it and do not need to undo that. This is the reason + for putting a bit with G2's index into__g1_start as well. */ + if (((g1_start & 1) ^ 1) == g) { - err = lll_futex_wait_requeue_pi (&cond->__data.__futex, - futex_val, &mutex->__data.__lock, - pshared); - - pi_flag = (err == 0); + /* We have to conservatively undo our potential mistake of stealing + a signal. We can stop trying to do that when the current G1 + changes because other spinning waiters will notice this too and + __condvar_quiesce_and_switch_g1 has checked that there are no + futex waiters anymore before switching G1. + Relaxed MO is fine for the __g1_start load because we need to + merely be able to observe this fact and not have to observe + something else as well. + ??? Would it help to spin for a little while to see whether the + current G1 gets closed? This might be worthwhile if the group is + small or close to being closed. */ + unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g); + while (__condvar_load_g1_start_relaxed (cond) == g1_start) + { + /* Try to add a signal. We don't need to acquire the lock + because at worst we can cause a spurious wake-up. If the + group is in the process of being closed (LSB is true), this + has an effect similar to us adding a signal. */ + if (((s & 1) != 0) + || atomic_compare_exchange_weak_relaxed + (cond->__data.__g_signals + g, &s, s + 2)) + { + /* If we added a signal, we also need to add a wake-up on + the futex. We also need to do that if we skipped adding + a signal because the group is being closed because + while __condvar_quiesce_and_switch_g1 could have closed + the group, it might stil be waiting for futex waiters to + leave (and one of those waiters might be the one we stole + the signal from, which cause it to block using the + futex). */ + futex_wake (cond->__data.__g_signals + g, 1, private); + break; + } + /* TODO Back off. */ + } } - else -#endif - /* Wait until woken by signal or broadcast. */ - lll_futex_wait (&cond->__data.__futex, futex_val, pshared); - - /* Disable asynchronous cancellation. */ - __pthread_disable_asynccancel (cbuffer.oldtype); - - /* We are going to look at shared data again, so get the lock. */ - lll_lock (cond->__data.__lock, pshared); - - /* If a broadcast happened, we are done. */ - if (cbuffer.bc_seq != cond->__data.__broadcast_seq) - goto bc_out; - - /* Check whether we are eligible for wakeup. */ - val = cond->__data.__wakeup_seq; } - while (val == seq || cond->__data.__woken_seq == val); - /* Another thread woken up. */ - ++cond->__data.__woken_seq; + done: - bc_out: + /* Confirm that we have been woken. We do that before acquiring the mutex + to allow for execution of pthread_cond_destroy while having acquired the + mutex. */ + __condvar_confirm_wakeup (cond, private); - cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT; - - /* If pthread_cond_destroy was called on this varaible already, - notify the pthread_cond_destroy caller all waiters have left - and it can be successfully destroyed. */ - if (cond->__data.__total_seq == -1ULL - && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT)) - lll_futex_wake (&cond->__data.__nwaiters, 1, pshared); + /* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT, + which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */ + err = __pthread_mutex_cond_lock (mutex); + /* XXX Abort on errors that are disallowed by POSIX? */ + return (err != 0) ? err : result; +} - /* We are done with the condvar. */ - lll_unlock (cond->__data.__lock, pshared); - /* The cancellation handling is back to normal, remove the handler. */ - __pthread_cleanup_pop (&buffer, 0); +/* See __pthread_cond_wait_common. */ +int +__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + return __pthread_cond_wait_common (cond, mutex, NULL); +} - /* Get the mutex before returning. Not needed for PI. */ -#if (defined lll_futex_wait_requeue_pi \ - && defined __ASSUME_REQUEUE_PI) - if (pi_flag) - { - __pthread_mutex_cond_lock_adjust (mutex); - return 0; - } - else -#endif - return __pthread_mutex_cond_lock (mutex); +/* See __pthread_cond_wait_common. */ +int +__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) +{ + /* Check parameter validity. This should also tell the compiler that + it can assume that abstime is not NULL. */ + if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) + return EINVAL; + return __pthread_cond_wait_common (cond, mutex, abstime); } versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, GLIBC_2_3_2); +versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, + GLIBC_2_3_2); |