diff options
Diffstat (limited to 'sysdeps/generic/dl-tls.c')
-rw-r--r-- | sysdeps/generic/dl-tls.c | 494 |
1 files changed, 392 insertions, 102 deletions
diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c index e42911acd3..d13b0f93fa 100644 --- a/sysdeps/generic/dl-tls.c +++ b/sysdeps/generic/dl-tls.c @@ -18,8 +18,12 @@ 02111-1307 USA. */ #include <assert.h> +#include <signal.h> #include <stdlib.h> +#include <unistd.h> +#include <sys/param.h> +#include <abort-instr.h> #include <tls.h> /* We don't need any of this if TLS is not supported. */ @@ -29,7 +33,31 @@ #include <ldsodefs.h> /* Value used for dtv entries for which the allocation is delayed. */ -# define TLS_DTV_UNALLOCATE ((void *) -1l) +# define TLS_DTV_UNALLOCATED ((void *) -1l) + + +/* Out-of-memory handler. */ +static void +__attribute__ ((__noreturn__)) +oom (void) +{ + static const char msg[] = "\ +cannot allocate memory for thread-local data: ABORT\n"; + + __libc_write (STDERR_FILENO, msg, sizeof (msg) - 1); + + /* Kill ourself. */ + __kill (__getpid (), SIGKILL); + + /* Just in case something goes wrong with the kill. */ + while (1) + { +# ifdef ABORT_INSTRUCTION + ABORT_INSTRUCTION; +# endif + } +} + size_t @@ -40,38 +68,49 @@ _dl_next_tls_modid (void) if (__builtin_expect (GL(dl_tls_dtv_gaps), false)) { - /* XXX If this method proves too costly we can optimize - it to use a constant time method. But I don't think - it's a problem. */ - struct link_map *runp = GL(dl_initimage_list); - bool used[GL(dl_tls_max_dtv_idx)]; - - assert (runp != NULL); + size_t disp = 0; + struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list); + + /* Note that this branch will never be executed during program + start since there are no gaps at that time. Therefore it + does not matter that the dl_tls_dtv_slotinfo is not allocated + yet when the function is called for the first times. */ + result = GL(dl_tls_static_nelem); + assert (result < GL(dl_tls_max_dtv_idx)); do { - assert (runp->l_tls_modid > 0 - && runp->l_tls_modid <= GL(dl_tls_max_dtv_idx)); - used[runp->l_tls_modid - 1] = true; - } - while ((runp = runp->l_tls_nextimage) != GL(dl_initimage_list)); + while (result - disp < runp->len) + if (runp->slotinfo[result - disp].map == NULL) + break; - result = 0; - do - /* The information about the gaps is pessimistic. It might be - there are actually none. */ - if (result >= GL(dl_tls_max_dtv_idx)) - { - /* Now we know there is actually no gap. Bump the maximum - ID number and remember that there are no gaps. */ - result = ++GL(dl_tls_max_dtv_idx); - GL(dl_tls_dtv_gaps) = false; + ++result; + assert (result <= GL(dl_tls_max_dtv_idx) + 1); + + if (result - disp < runp->len) break; - } - while (used[result++]); + + disp += runp->len; + } + while ((runp = runp->next) != NULL); + + if (result >= GL(dl_tls_max_dtv_idx) + 1) + { + /* The new index must indeed be exactly one higher than the + previous high. */ + assert (result == GL(dl_tls_max_dtv_idx) + 1); + + /* There is no gap anymore. */ + GL(dl_tls_dtv_gaps) = false; + + goto nogaps; + } } else - /* No gaps, allocate a new entry. */ - result = ++GL(dl_tls_max_dtv_idx); + { + /* No gaps, allocate a new entry. */ + nogaps: + result = ++GL(dl_tls_max_dtv_idx); + } return result; } @@ -79,41 +118,39 @@ _dl_next_tls_modid (void) void internal_function -_dl_determine_tlsoffset (struct link_map *lastp) +_dl_determine_tlsoffset (void) { - struct link_map *runp; - size_t max_align = 0; + struct dtv_slotinfo *slotinfo; + size_t max_align = __alignof__ (void *); size_t offset; + size_t cnt; - if (lastp == NULL) - { - /* None of the objects used at startup time uses TLS. We still - have to allocate the TCB and dtv. */ - GL(dl_tls_static_size) = TLS_TCB_SIZE; - GL(dl_tls_static_align) = TLS_TCB_ALIGN; - - return; - } + /* The first element of the dtv slot info list is allocated. */ + assert (GL(dl_tls_dtv_slotinfo_list) != NULL); + /* There is at this point only one element in the + dl_tls_dtv_slotinfo_list list. */ + assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL); # if TLS_TCB_AT_TP /* We simply start with zero. */ offset = 0; - runp = lastp->l_tls_nextimage; - do + slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo; + for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt) { - max_align = MAX (max_align, runp->l_tls_align); + assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len); + + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); /* Compute the offset of the next TLS block. */ - offset = roundup (offset + runp->l_tls_blocksize, runp->l_tls_align); + offset = roundup (offset + slotinfo[cnt].map->l_tls_blocksize, + slotinfo[cnt].map->l_tls_align); /* XXX For some architectures we perhaps should store the negative offset. */ - runp->l_tls_offset = offset; + slotinfo[cnt].map->l_tls_offset = offset; } - while ((runp = runp->l_tls_nextimage) != lastp->l_tls_nextimage); -#if 0 /* The thread descriptor (pointed to by the thread pointer) has its own alignment requirement. Adjust the static TLS size and TLS offsets appropriately. */ @@ -121,34 +158,44 @@ _dl_determine_tlsoffset (struct link_map *lastp) // XXX after the first (closest to the TCB) TLS block since this // XXX would invalidate the offsets the linker creates for the LE // XXX model. - if (offset % TLS_TCB_ALIGN != 0) - abort (); -#endif GL(dl_tls_static_size) = offset + TLS_TCB_SIZE; # elif TLS_DTV_AT_TP - struct link_map *prevp; - - /* The first block starts right after the TCB. */ + /* The TLS blocks start right after the TCB. */ offset = TLS_TCB_SIZE; - max_align = runp->l_tls_align; - runp = lastp->l_tls_nextimage; - runp->l_tls_offset = offset; - prevp = runp; - while ((runp = runp->l_tls_nextimage) != firstp) + /* The first block starts right after the TCB. */ + slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo; + if (slotinfo[1].map != NULL) { - max_align = MAX (max_align, runp->l_tls_align); + size_t prev_size - /* Compute the offset of the next TLS block. */ - offset = roundup (offset + prevp->l_tls_blocksize, runp->l_tls_align); + offset = roundup (offset, slotinfo[1].map->l_tls_align); + slotinfo[1].map->l_tls_offset = offset; + max_align = slotinfo[1].map->l_tls_align; + prev_size = slotinfo[1].map->l_tls_blocksize; - runp->l_tls_offset = offset; + for (cnt = 2; slotinfo[cnt].map != NULL; ++cnt) + { + assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len); + + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); + + /* Compute the offset of the next TLS block. */ + offset = roundup (offset + prev_size, + slotinfo[cnt].map->l_tls_align); + + /* XXX For some architectures we perhaps should store the + negative offset. */ + slotinfo[cnt].map->l_tls_offset = offset; + + prev_size = slotinfo[cnt].map->l_tls_blocksize; + } - prevp = runp; + offset += prev_size; } - GL(dl_tls_static_size) = offset + prevp->l_tls_blocksize; + GL(dl_tls_static_size) = offset; # else # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" # endif @@ -164,59 +211,100 @@ _dl_allocate_tls (void) { void *result; dtv_t *dtv; + size_t dtv_length; /* Allocate a correctly aligned chunk of memory. */ /* XXX For now */ assert (GL(dl_tls_static_align) <= GL(dl_pagesize)); -#ifdef MAP_ANON -# define _dl_zerofd (-1) -#else -# define _dl_zerofd GL(dl_zerofd) +# ifdef MAP_ANON +# define _dl_zerofd (-1) +# else +# define _dl_zerofd GL(dl_zerofd) if ((dl_zerofd) == -1) GL(dl_zerofd) = _dl_sysdep_open_zero_fill (); -# define MAP_ANON 0 -#endif +# define MAP_ANON 0 +# endif result = __mmap (0, GL(dl_tls_static_size), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, _dl_zerofd, 0); - dtv = (dtv_t *) malloc ((GL(dl_tls_max_dtv_idx) + 1) * sizeof (dtv_t)); + /* We allocate a few more elements in the dtv than are needed for the + initial set of modules. This should avoid in most cases expansions + of the dtv. */ + dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS; + dtv = (dtv_t *) malloc ((dtv_length + 2) * sizeof (dtv_t)); if (result != MAP_FAILED && dtv != NULL) { - struct link_map *runp; + struct dtv_slotinfo_list *listp; + bool first_block = true; + size_t total = 0; # if TLS_TCB_AT_TP /* The TCB follows the TLS blocks. */ result = (char *) result + GL(dl_tls_static_size) - TLS_TCB_SIZE; # endif - /* XXX Fill in an correct generation number. */ - dtv[0].counter = 0; - - /* Initialize the memory from the initialization image list and clear - the BSS parts. */ - if (GL(dl_initimage_list) != NULL) + /* This is the initial length of the dtv. */ + dtv[0].counter = dtv_length; + /* Fill in the generation number. */ + dtv[1].counter = GL(dl_tls_generation) = 0; + /* Initialize all of the rest of the dtv with zero to indicate + nothing there. */ + memset (dtv + 2, '\0', dtv_length * sizeof (dtv_t)); + + /* We have to look prepare the dtv for all currently loaded + modules using TLS. For those which are dynamically loaded we + add the values indicating deferred allocation. */ + listp = GL(dl_tls_dtv_slotinfo_list); + while (1) { - runp = GL(dl_initimage_list)->l_tls_nextimage; - do + size_t cnt; + + for (cnt = first_block ? 1 : 0; cnt < listp->len; ++cnt) { - assert (runp->l_tls_modid > 0); - assert (runp->l_tls_modid <= GL(dl_tls_max_dtv_idx)); + struct link_map *map; + void *dest; + + /* Check for the total number of used slots. */ + if (total + cnt >= GL(dl_tls_max_dtv_idx)) + break; + + map = listp->slotinfo[cnt].map; + if (map == NULL) + /* Unused entry. */ + continue; + + if (map->l_type == lt_loaded) + { + /* For dynamically loaded modules we simply store + the value indicating deferred allocation. */ + dtv[1 + map->l_tls_modid].pointer = TLS_DTV_UNALLOCATED; + continue; + } + + assert (map->l_tls_modid == cnt); + assert (map->l_tls_blocksize >= map->l_tls_initimage_size); # if TLS_TCB_AT_TP - dtv[runp->l_tls_modid].pointer = result - runp->l_tls_offset; + assert (map->l_tls_offset >= map->l_tls_blocksize); + dest = (char *) result - map->l_tls_offset; # elif TLS_DTV_AT_TP - dtv[runp->l_tls_modid].pointer = result + runp->l_tls_offset; + dest = (char *) result + map->l_tls_offset; # else # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" # endif - memset (__mempcpy (dtv[runp->l_tls_modid].pointer, - runp->l_tls_initimage, - runp->l_tls_initimage_size), - '\0', - runp->l_tls_blocksize - runp->l_tls_initimage_size); + /* We don't have to clear the BSS part of the TLS block + since mmap is used to allocate the memory which + guarantees it is initialized to zero. */ + dtv[1 + cnt].pointer = memcpy (dest, map->l_tls_initimage, + map->l_tls_initimage_size); } - while ((runp = runp->l_tls_nextimage) - != GL(dl_initimage_list)->l_tls_nextimage); + + total += cnt; + if (total >= GL(dl_tls_max_dtv_idx)) + break; + + listp = listp->next; + assert (listp != NULL); } /* Add the dtv to the thread data structures. */ @@ -232,6 +320,7 @@ _dl_allocate_tls (void) } +# ifdef SHARED /* The __tls_get_addr function has two basic forms which differ in the arguments. The IA-64 form takes two parameters, the module ID and offset. The form used, among others, on IA-32 takes a reference to @@ -239,26 +328,227 @@ _dl_allocate_tls (void) form seems to be more often used (in the moment) so we default to it. Users of the IA-64 form have to provide adequate definitions of the following macros. */ -# ifndef GET_ADDR_ARGS -# define GET_ADDR_ARGS tls_index *ti -# endif -# ifndef GET_ADDR_MODULE -# define GET_ADDR_MODULE ti->ti_module -# endif -# ifndef GET_ADDR_OFFSET -# define GET_ADDR_OFFSET ti->ti_offset -# endif +# ifndef GET_ADDR_ARGS +# define GET_ADDR_ARGS tls_index *ti +# endif +# ifndef GET_ADDR_MODULE +# define GET_ADDR_MODULE ti->ti_module +# endif +# ifndef GET_ADDR_OFFSET +# define GET_ADDR_OFFSET ti->ti_offset +# endif +/* Systems which do not have tls_index also probably have to define + DONT_USE_TLS_INDEX. */ + +# ifndef __TLS_GET_ADDR +# define __TLS_GET_ADDR __tls_get_addr +# endif + + +/* Return the symbol address given the map of the module it is in and + the symbol record. This is used in dl-sym.c. */ +void * +internal_function +_dl_tls_symaddr (struct link_map *map, const ElfW(Sym) *ref) +{ +# ifndef DONT_USE_TLS_INDEX + tls_index tmp = + { + .ti_module = map->l_tls_modid, + .ti_offset = ref->st_value + }; + + return __TLS_GET_ADDR (&tmp); +# else + return __TLS_GET_ADDR (map->l_tls_modid, ref->st_value); +# endif +} + + +static void * +allocate_and_init (struct link_map *map) +{ + void *newp; + + newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize); + if (newp == NULL) + oom (); + /* Initialize the memory. */ + memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size), + '\0', map->l_tls_blocksize - map->l_tls_initimage_size); + return newp; +} + + +/* The generic dynamic and local dynamic model cannot be used in + statically linked applications. */ void * __tls_get_addr (GET_ADDR_ARGS) { dtv_t *dtv = THREAD_DTV (); + struct link_map *the_map = NULL; + void *p; + + if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0)) + { + struct dtv_slotinfo_list *listp; + size_t idx; + + /* The global dl_tls_dtv_slotinfo array contains for each module + index the generation counter current when the entry was + created. This array never shrinks so that all module indices + which were valid at some time can be used to access it. + Before the first use of a new module index in this function + the array was extended appropriately. Access also does not + have to be guarded against modifications of the array. It is + assumed that pointer-size values can be read atomically even + in SMP environments. It is possible that other threads at + the same time dynamically load code and therefore add to the + slotinfo list. This is a problem since we must not pick up + any information about incomplete work. The solution to this + is to ignore all dtv slots which were created after the one + we are currently interested. We know that dynamic loading + for this module is completed and this is the last load + operation we know finished. */ + idx = GET_ADDR_MODULE; + listp = GL(dl_tls_dtv_slotinfo_list); + while (idx >= listp->len) + { + idx -= listp->len; + listp = listp->next; + } - if (dtv[GET_ADDR_MODULE].pointer == TLS_DTV_UNALLOCATE) - /* XXX */; + if (dtv[0].counter < listp->slotinfo[idx].gen) + { + /* The generation counter for the slot is higher than what + the current dtv implements. We have to update the whole + dtv but only those entries with a generation counter <= + the one for the entry we need. */ + size_t new_gen = listp->slotinfo[idx].gen; + size_t total = 0; + + /* We have to look through the entire dtv slotinfo list. */ + listp = GL(dl_tls_dtv_slotinfo_list); + do + { + size_t cnt; + + for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt) + { + size_t gen = listp->slotinfo[cnt].gen; + struct link_map *map; + size_t modid; + + if (gen > new_gen) + /* This is a slot for a generation younger than + the one we are handling now. It might be + incompletely set up so ignore it. */ + continue; + + /* If the entry is older than the current dtv layout + we know we don't have to handle it. */ + if (gen <= dtv[0].counter) + continue; + + /* If there is no map this means the entry is empty. */ + map = listp->slotinfo[cnt].map; + if (map == NULL) + { + /* If this modid was used at some point the memory + might still be allocated. */ + if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED) + free (dtv[total + cnt].pointer); + + continue; + } + + /* Check whether the current dtv array is large enough. */ + modid = map->l_tls_modid; + assert (total + cnt == modid); + if (dtv[-1].counter < modid) + { + /* Reallocate the dtv. */ + dtv_t *newp; + size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS; + size_t oldsize = dtv[-1].counter; + + assert (map->l_tls_modid <= newsize); + + newp = (dtv_t *) realloc (&dtv[-1], + (2 + newsize) + * sizeof (dtv_t)); + if (newp == NULL) + oom (); + + newp[0].counter = newsize; + + /* Clear the newly allocate part. */ + memset (newp + 2 + oldsize, '\0', + (newsize - oldsize) * sizeof (dtv_t)); + + /* Point dtv to the generation counter. */ + dtv = &newp[1]; + + /* Install this new dtv in the thread data + structures. */ + INSTALL_NEW_DTV (dtv); + } + + /* If there is currently memory allocate for this + dtv entry free it. */ + /* XXX Ideally we will at some point create a memory + pool. */ + if (dtv[modid].pointer != TLS_DTV_UNALLOCATED) + /* Note that free is called for NULL is well. We + deallocate even if it is this dtv entry we are + supposed to load. The reason is that we call + memalign and not malloc. */ + free (dtv[modid].pointer); + + /* This module is loaded dynamically- We defer + memory allocation. */ + dtv[modid].pointer = TLS_DTV_UNALLOCATED; + + if (modid == GET_ADDR_MODULE) + the_map = map; + } + + total += listp->len; + } + while ((listp = listp->next) != NULL); - return (char *) dtv[GET_ADDR_MODULE].pointer + GET_ADDR_OFFSET; + /* This will be the new maximum generation counter. */ + dtv[0].counter = new_gen; + } + } + + p = dtv[GET_ADDR_MODULE].pointer; + + if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0)) + { + /* The allocation was deferred. Do it now. */ + if (the_map == NULL) + { + /* Find the link map for this module. */ + size_t idx = GET_ADDR_MODULE; + struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list); + + while (idx >= listp->len) + { + idx -= listp->len; + listp = listp->next; + } + + the_map = listp->slotinfo[idx].map; + } + + p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map); + } + + return (char *) p + GET_ADDR_OFFSET; } +# endif #endif /* use TLS */ |