From 7f507ee17aee720fa423fa38502bc3caa0dd03d7 Mon Sep 17 00:00:00 2001 From: Andrew Hunter Date: Fri, 3 Jan 2014 11:22:26 -0800 Subject: Async-signal safe TLS. ChangeLog: 2014-01-03 Andrew Hunter * elf/dl-open.c (): New comment. * elf/dl-reloc.c (_dl_try_allocate_static_tls): Use atomic_compare_and_exchange_bool_acq (_dl_allocate_static_tls): Block signals. * elf/dl-tls.c (allocate_and_init): Return void. (_dl_update_slotinfo): Block signals, use atomic update. nptl/ChangeLog: 2014-01-03 Andrew Hunter * nptl/Makefile (tst-tls7): New test. * nptl/tst-tls7.c: New file. * nptl/tst-tls7mod.c: New file. * nptl/allocatestack.c (init_one_static_tls): Use atomic barrier. --- elf/dl-open.c | 5 ++- elf/dl-reloc.c | 48 ++++++++++++++++++++++++--- elf/dl-tls.c | 100 ++++++++++++++++++++++++++++++++++++++------------------- 3 files changed, 114 insertions(+), 39 deletions(-) (limited to 'elf') diff --git a/elf/dl-open.c b/elf/dl-open.c index a9ca6b3b44..ea222d03e1 100644 --- a/elf/dl-open.c +++ b/elf/dl-open.c @@ -548,7 +548,10 @@ cannot load any more object with static TLS")); generation of the DSO we are allocating data for. */ _dl_update_slotinfo (imap->l_tls_modid); #endif - + /* We do this iteration under a signal mask in dl-reloc; why not + here? Because these symbols are new and dlopen hasn't + returned yet. So we can't possibly be racing with a TLS + access to them from another thread. */ GL(dl_init_static_tls) (imap); assert (imap->l_need_tls_init == 0); } diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c index 1f66fccee2..81ee47eb62 100644 --- a/elf/dl-reloc.c +++ b/elf/dl-reloc.c @@ -16,8 +16,10 @@ License along with the GNU C Library; if not, see . */ +#include #include #include +#include #include #include #include @@ -70,8 +72,6 @@ _dl_try_allocate_static_tls (struct link_map *map) size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align - map->l_tls_firstbyte_offset); - - map->l_tls_offset = GL(dl_tls_static_used) = offset; #elif TLS_DTV_AT_TP /* dl_tls_static_used includes the TCB at the beginning. */ size_t offset = (((GL(dl_tls_static_used) @@ -83,7 +83,36 @@ _dl_try_allocate_static_tls (struct link_map *map) if (used > GL(dl_tls_static_size)) goto fail; - map->l_tls_offset = offset; +#else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +#endif + /* We've computed the new value we want, now try to install it. */ + ptrdiff_t val; + if ((val = map->l_tls_offset) == NO_TLS_OFFSET) + { + /* l_tls_offset starts out at NO_TLS_OFFSET, and all attempts to + change it go from NO_TLS_OFFSET to some other value. We use + compare_and_exchange to ensure only one attempt succeeds. We + don't actually need any memory ordering here, but _acq is the + weakest available. */ + (void ) atomic_compare_and_exchange_bool_acq (&map->l_tls_offset, + offset, + NO_TLS_OFFSET); + val = map->l_tls_offset; + assert (val != NO_TLS_OFFSET); + } + if (val != offset) + { + /* We'd like to set a static offset for this section, but another + thread has already used a dynamic TLS block for it. Since we can + only use static offsets if everyone does (and it's not practical + to move that thread's dynamic block), we have to fail. */ + goto fail; + } + /* We installed the value; now update the globals. */ +#if TLS_TCB_AT_TP + GL(dl_tls_static_used) = offset; +#elif TLS_DTV_AT_TP map->l_tls_firstbyte_offset = GL(dl_tls_static_used); GL(dl_tls_static_used) = used; #else @@ -114,8 +143,17 @@ void internal_function __attribute_noinline__ _dl_allocate_static_tls (struct link_map *map) { - if (map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET - || _dl_try_allocate_static_tls (map)) + /* We wrap this in a signal mask because it has to iterate all threads + (including this one) and update this map's TLS entry. A signal handler + accessing TLS would try to do the same update and break. */ + sigset_t old; + _dl_mask_all_signals (&old); + int err = -1; + if (map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET) + err = _dl_try_allocate_static_tls (map); + + _dl_unmask_signals (&old); + if (err != 0) { _dl_signal_error (0, map->l_name, NULL, N_("\ cannot allocate memory in static TLS block")); diff --git a/elf/dl-tls.c b/elf/dl-tls.c index c1802e7d4e..50ec876eae 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -17,6 +17,7 @@ . */ #include +#include #include #include #include @@ -533,19 +534,21 @@ rtld_hidden_def (_dl_deallocate_tls) # endif -static void * -allocate_and_init (struct link_map *map) +static void +allocate_and_init (dtv_t *dtv, struct link_map *map) { void *newp; newp = __signal_safe_memalign (map->l_tls_align, map->l_tls_blocksize); if (newp == NULL) oom (); - /* Initialize the memory. */ + /* Initialize the memory. Since this is our thread's space, we are + under a signal mask, and no one has touched this section before, + we can safely just overwrite whatever's there. */ memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size), '\0', map->l_tls_blocksize - map->l_tls_initimage_size); - return newp; + dtv->pointer.val = newp; } @@ -587,7 +590,15 @@ _dl_update_slotinfo (unsigned long int req_modid) the entry we need. */ size_t new_gen = listp->slotinfo[idx].gen; size_t total = 0; - + sigset_t old; + + _dl_mask_all_signals (&old); + /* We use the signal mask as a lock against reentrancy here. + Check that a signal taken before the lock didn't already + update us. */ + dtv = THREAD_DTV (); + if (dtv[0].counter >= listp->slotinfo[idx].gen) + goto out; /* We have to look through the entire dtv slotinfo list. */ listp = GL(dl_tls_dtv_slotinfo_list); do @@ -699,6 +710,8 @@ _dl_update_slotinfo (unsigned long int req_modid) /* This will be the new maximum generation counter. */ dtv[0].counter = new_gen; + out: + _dl_unmask_signals (&old); } return the_map; @@ -724,39 +737,60 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) the_map = listp->slotinfo[idx].map; } - - again: - /* Make sure that, if a dlopen running in parallel forces the - variable into static storage, we'll wait until the address in the - static TLS block is set up, and use that. If we're undecided - yet, make sure we make the decision holding the lock as well. */ - if (__builtin_expect (the_map->l_tls_offset - != FORCED_DYNAMIC_TLS_OFFSET, 0)) + sigset_t old; + _dl_mask_all_signals (&old); + + /* As with update_slotinfo, we use the sigmask as a check against + reentrancy. */ + if (dtv[GET_ADDR_MODULE].pointer.val != TLS_DTV_UNALLOCATED) + goto out; + + /* Synchronize against a parallel dlopen() forcing this variable + into static storage. If that happens, we have to be more careful + about initializing the area, as that dlopen() will be iterating + the threads to do so itself. */ + ptrdiff_t offset; + if ((offset = the_map->l_tls_offset) == NO_TLS_OFFSET) { - __rtld_lock_lock_recursive (GL(dl_load_lock)); - if (__builtin_expect (the_map->l_tls_offset == NO_TLS_OFFSET, 1)) - { - the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET; - __rtld_lock_unlock_recursive (GL(dl_load_lock)); - } - else + /* l_tls_offset starts out at NO_TLS_OFFSET, and all attempts to + change it go from NO_TLS_OFFSET to some other value. We use + compare_and_exchange to ensure only one attempt succeeds. We + don't actually need any memory ordering here, but _acq is the + weakest available. */ + (void) atomic_compare_and_exchange_bool_acq (&the_map->l_tls_offset, + FORCED_DYNAMIC_TLS_OFFSET, + NO_TLS_OFFSET); + offset = the_map->l_tls_offset; + assert (offset != NO_TLS_OFFSET); + } + if (offset == FORCED_DYNAMIC_TLS_OFFSET) + { + allocate_and_init (&dtv[GET_ADDR_MODULE], the_map); + } + else + { + void **pp = &dtv[GET_ADDR_MODULE].pointer.val; + while (atomic_forced_read (*pp) == TLS_DTV_UNALLOCATED) { - __rtld_lock_unlock_recursive (GL(dl_load_lock)); - if (__builtin_expect (the_map->l_tls_offset - != FORCED_DYNAMIC_TLS_OFFSET, 1)) - { - void *p = dtv[GET_ADDR_MODULE].pointer.val; - if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0)) - goto again; - - return (char *) p + GET_ADDR_OFFSET; - } + /* for lack of a better (safe) thing to do, just spin. + Someone else (not us; it's done under a signal mask) set + this map to a static TLS offset, and they'll iterate all + threads to initialize it. They'll eventually write + to pointer.val, at which point we know they've fully + completed initialization. */ + atomic_delay (); } + /* Make sure we've picked up their initialization of the actual + block; this pairs against the write barrier in + init_one_static_tls, guaranteeing that we see their write of + the tls_initimage into the static region. */ + atomic_read_barrier (); } - void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map); - dtv[GET_ADDR_MODULE].pointer.is_static = false; +out: + assert (dtv[GET_ADDR_MODULE].pointer.val != TLS_DTV_UNALLOCATED); + _dl_unmask_signals (&old); - return (char *) p + GET_ADDR_OFFSET; + return (char *) dtv[GET_ADDR_MODULE].pointer.val + GET_ADDR_OFFSET; } -- cgit 1.4.1