about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-02-13 08:03:56 +0000
committerUlrich Drepper <drepper@redhat.com>2002-02-13 08:03:56 +0000
commitaed283dd456e7566cbfef88abe7b235ca77cc23c (patch)
tree39e854a8a182adce9cc8876c27a0a1d8b07d878e
parent712ac5b11c3f250c4354d92ee5a902c67b6d2045 (diff)
downloadglibc-aed283dd456e7566cbfef88abe7b235ca77cc23c.tar.gz
glibc-aed283dd456e7566cbfef88abe7b235ca77cc23c.tar.xz
glibc-aed283dd456e7566cbfef88abe7b235ca77cc23c.zip
Update.
2002-02-12  Ulrich Drepper  <drepper@redhat.com>

	* sysdeps/generic/dl-tls.c (TLS_DTV_UNALLOCATED): Renamed from
	TLS_DTV_UNALLOCATE.
	(oom): New function.
	(_dl_next_tls_modid): Rewrite to handle dl_tls_dtv_slotinfo_list.
	(_dl_determine_tlsoffset): Likewise.
	(_dl_allocate_tls): Likewise.
	(__TLS_GET_ADDR): Define if not already defined.
	(_dl_tls_symaddr): New function.
	(allocate_and_init): New function.
	(__tls_get_addr): Actually implement handling of generation counter
	and deferred allocation.
	* sysdeps/generic/ldsodefs.h (_rtld_global): Remove _dl_initimage_list,
	add _dl_tls_dtv_slotinfo_list, _dl_tls_static_nelem, and
	_dl_tls_generation.
	Define TLS_SLOTINFO_SURPLUS and DTV_SURPLUS.
	Declare _dl_tls_symaddr.
	* sysdeps/i386/dl-tls.h: Disable __tls_get_addr handling unless
	SHARED.
	* include/link.h (struct link_map):  Remove l_tls_nextimage and
	l_tls_previmage.
	* elf/dl-sym.c (_dl_sym): After successful lookup call _dl_tls_symaddr
	instead of DL_SYMBOL_ADDRESS for STT_TLS symbols.
	(_dl_vsym): Likewise.
	* elf/rtld.c (_dl_start_final): Adjust initdtv initialization for new
	layout.
	(dl_main): Allow PT_TLS be present for empty segment.  Remove
	nextimage list handling.  Instead add all modules using TLS to
	dl_tls_dtv_slotinfo_list.
	* elf/dl-open.c (dl_open_worker): After successfully loading all
	objects add those with TLS to the dl_tls_dtv_slotinfo_list list.
	* elf/dl-load.c (_dl_map_object_from_fd): If PT_TLS entry is for an
	empty segment don't do anything.  Remove handling of initimage list.
	* elf/Versions [ld] (GLIBC_2.0): Add __libc_memalign.
	(GLIBC_PRIVATE): Add _dl_tls_symaddr.
	* elf/dl-minimal.c: Define __libc_memalign.
	* elf/dl-support.c: Remove _dl_initimage_list.  Add
	_dl_tls_dtv_slotinfo_list, _dl_tls_static_nelem, and
	_dl_tls_generation.
	* include/stdlib.h: Declare __libc_memalign.

	* elf/Makefile: Add rules to build and run tst-tls4 and tst-tls5.
	* elf/tst-tls4.c: New file.
	* elf/tst-tls5.c: New file.
	* elf/tst-tlsmod2.c: New file.

	* elf/tls-macros.h: asms using ___tls_get_addr destroy %ecx and %edx.

	* elf/tst-tlsmod1.c: Don't define variables unles USE_TLS.

	* elf/tst-tls1.c: Use test-skeleton.c.
	* elf/tst-tls2.c: Likewise.
	* elf/tst-tls3.c: Likewise.

	* elf/dl-conflict.c (RESOLVE_MAP): Return NULL not 0.

	* sysdeps/mips/machine-gmon.h: Update MCOUNT for current GCC behavior.
-rw-r--r--ChangeLog60
-rw-r--r--elf/Makefile10
-rw-r--r--elf/Versions4
-rw-r--r--elf/dl-load.c32
-rw-r--r--elf/dl-minimal.c12
-rw-r--r--elf/dl-open.c84
-rw-r--r--elf/dl-support.c12
-rw-r--r--elf/dl-sym.c22
-rw-r--r--elf/rtld.c131
-rw-r--r--elf/tls-macros.h16
-rw-r--r--elf/tst-tls1.c10
-rw-r--r--elf/tst-tls2.c10
-rw-r--r--elf/tst-tls3.c10
-rw-r--r--elf/tst-tls4.c56
-rw-r--r--elf/tst-tls5.c72
-rw-r--r--elf/tst-tlsmod1.c2
-rw-r--r--elf/tst-tlsmod2.c32
-rw-r--r--include/link.h4
-rw-r--r--include/stdlib.h2
-rw-r--r--linuxthreads/ChangeLog7
-rw-r--r--linuxthreads/sysdeps/i386/tls.h10
-rw-r--r--sysdeps/generic/dl-tls.c494
-rw-r--r--sysdeps/generic/ldsodefs.h37
-rw-r--r--sysdeps/i386/dl-tls.h6
24 files changed, 909 insertions, 226 deletions
diff --git a/ChangeLog b/ChangeLog
index 6d9b645879..8d4e13458a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,63 @@
+2002-02-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/generic/dl-tls.c (TLS_DTV_UNALLOCATED): Renamed from
+	TLS_DTV_UNALLOCATE.
+	(oom): New function.
+	(_dl_next_tls_modid): Rewrite to handle dl_tls_dtv_slotinfo_list.
+	(_dl_determine_tlsoffset): Likewise.
+	(_dl_allocate_tls): Likewise.
+	(__TLS_GET_ADDR): Define if not already defined.
+	(_dl_tls_symaddr): New function.
+	(allocate_and_init): New function.
+	(__tls_get_addr): Actually implement handling of generation counter
+	and deferred allocation.
+	* sysdeps/generic/ldsodefs.h (_rtld_global): Remove _dl_initimage_list,
+	add _dl_tls_dtv_slotinfo_list, _dl_tls_static_nelem, and
+	_dl_tls_generation.
+	Define TLS_SLOTINFO_SURPLUS and DTV_SURPLUS.
+	Declare _dl_tls_symaddr.
+	* sysdeps/i386/dl-tls.h: Disable __tls_get_addr handling unless
+	SHARED.
+	* include/link.h (struct link_map):  Remove l_tls_nextimage and
+	l_tls_previmage.
+	* elf/dl-sym.c (_dl_sym): After successful lookup call _dl_tls_symaddr
+	instead of DL_SYMBOL_ADDRESS for STT_TLS symbols.
+	(_dl_vsym): Likewise.
+	* elf/rtld.c (_dl_start_final): Adjust initdtv initialization for new
+	layout.
+	(dl_main): Allow PT_TLS be present for empty segment.  Remove
+	nextimage list handling.  Instead add all modules using TLS to
+	dl_tls_dtv_slotinfo_list.
+	* elf/dl-open.c (dl_open_worker): After successfully loading all
+	objects add those with TLS to the dl_tls_dtv_slotinfo_list list.
+	* elf/dl-load.c (_dl_map_object_from_fd): If PT_TLS entry is for an
+	empty segment don't do anything.  Remove handling of initimage list.
+	* elf/Versions [ld] (GLIBC_2.0): Add __libc_memalign.
+	(GLIBC_PRIVATE): Add _dl_tls_symaddr.
+	* elf/dl-minimal.c: Define __libc_memalign.
+	* elf/dl-support.c: Remove _dl_initimage_list.  Add
+	_dl_tls_dtv_slotinfo_list, _dl_tls_static_nelem, and
+	_dl_tls_generation.
+	* include/stdlib.h: Declare __libc_memalign.
+
+	* elf/Makefile: Add rules to build and run tst-tls4 and tst-tls5.
+	* elf/tst-tls4.c: New file.
+	* elf/tst-tls5.c: New file.
+	* elf/tst-tlsmod2.c: New file.
+
+	* elf/tls-macros.h: asms using ___tls_get_addr destroy %ecx and %edx.
+
+	* elf/tst-tlsmod1.c: Don't define variables unles USE_TLS.
+
+	* elf/tst-tls1.c: Use test-skeleton.c.
+	* elf/tst-tls2.c: Likewise.
+	* elf/tst-tls3.c: Likewise.
+
+	* elf/dl-conflict.c (RESOLVE_MAP): Return NULL not 0.
+
 2002-02-08  Daniel Jacobowitz  <drow@mvista.com>
 
-	* sysdeps/mips/machine-gmon.h: Update MCOUNT for current GCC
-	behavior.
+	* sysdeps/mips/machine-gmon.h: Update MCOUNT for current GCC behavior.
 
 2002-02-10  Ulrich Drepper  <drepper@redhat.com>
 
diff --git a/elf/Makefile b/elf/Makefile
index 9c5d4f6210..4ef1614c06 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -119,7 +119,7 @@ tests = loadtest restest1 preloadtest loadfail multiload origtest resolvfail \
 	$(tests-nodlopen-$(have-z-nodlopen)) neededtest neededtest2 \
 	neededtest3 neededtest4 unload2 lateglobal initfirst global \
 	restest2 next dblload dblunload reldep5 reldep6 tst-tls1 tst-tls2 \
-	tst-tls3
+	tst-tls3 tst-tls4 tst-tls5
 test-srcs = tst-pathopt
 tests-vis-yes = vismain
 tests-nodelete-yes = nodelete
@@ -137,7 +137,7 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
 		unload2mod unload2dep ltglobmod1 ltglobmod2 pathoptobj \
 		dblloadmod1 dblloadmod2 dblloadmod3 reldepmod5 reldepmod6 \
 	        reldep6mod0 reldep6mod1 reldep6mod2 reldep6mod3 reldep6mod4 \
-		tst-tlsmod1
+		tst-tlsmod1 tst-tlsmod2
 modules-vis-yes = vismod1 vismod2 vismod3
 modules-nodelete-yes = nodelmod1 nodelmod2 nodelmod3 nodelmod4
 modules-nodlopen-yes = nodlopenmod nodlopenmod2
@@ -442,3 +442,9 @@ $(objpfx)reldep6: $(libdl)
 $(objpfx)reldep6.out: $(objpfx)reldep6mod3.so $(objpfx)reldep6mod4.so
 
 $(objpfx)tst-tls3: $(objpfx)tst-tlsmod1.so
+
+$(objpfx)tst-tls4: $(libdl)
+$(objpfx)tst-tls4.out: $(objpfx)tst-tlsmod2.so
+
+$(objpfx)tst-tls5: $(libdl)
+$(objpfx)tst-tls5.out: $(objpfx)tst-tlsmod2.so
diff --git a/elf/Versions b/elf/Versions
index 8d8a1e8eac..927e26ff46 100644
--- a/elf/Versions
+++ b/elf/Versions
@@ -27,7 +27,7 @@ libc {
 ld {
   GLIBC_2.0 {
     # Function from libc.so which must be shared with libc.
-    calloc; free; malloc; realloc;
+    calloc; free; malloc; realloc; __libc_memalign;
 
     _r_debug;
   }
@@ -49,6 +49,6 @@ ld {
     _dl_map_object; _dl_map_object_deps; _dl_out_of_memory;
     _dl_relocate_object; _dl_signal_error; _dl_start_profile; _dl_starting_up;
     _dl_unload_cache;
-    _rtld_global;
+    _rtld_global; _dl_tls_symaddr;
   }
 }
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 1fffe72108..48e7cba235 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -941,30 +941,18 @@ _dl_map_object_from_fd (const char *name, int fd, struct filebuf *fbp,
 
 #ifdef USE_TLS
 	case PT_TLS:
-	  l->l_tls_blocksize = ph->p_memsz;
-	  l->l_tls_align = ph->p_align;
-	  l->l_tls_initimage_size = ph->p_filesz;
-	  /* Since we don't know the load address yet only store the
-	     offset.  We will adjust it later.  */
-	  l->l_tls_initimage = (void *) ph->p_offset;
-
-	/* This is the first element of the initialization image list.
-	   It is created as a circular list so that we can easily
-	   append to it.  */
-	  if (GL(dl_initimage_list) == NULL)
-	    GL(dl_initimage_list) = l->l_tls_nextimage = l->l_tls_previmage
-	      = l;
-	  else
+	  if (ph->p_memsz > 0)
 	    {
-	      l->l_tls_nextimage = GL(dl_initimage_list)->l_tls_nextimage;
-	      l->l_tls_nextimage->l_tls_previmage = l;
-	      l->l_tls_previmage = GL(dl_initimage_list);
-	      l->l_tls_previmage->l_tls_nextimage = l;
-	      GL(dl_initimage_list) = l;
+	      l->l_tls_blocksize = ph->p_memsz;
+	      l->l_tls_align = ph->p_align;
+	      l->l_tls_initimage_size = ph->p_filesz;
+	      /* Since we don't know the load address yet only store the
+		 offset.  We will adjust it later.  */
+	      l->l_tls_initimage = (void *) ph->p_offset;
+
+	      /* Assign the next available module ID.  */
+	      l->l_tls_modid = _dl_next_tls_modid ();
 	    }
-
-	  /* Assign the next available module ID.  */
-	  l->l_tls_modid = _dl_next_tls_modid ();
 	  break;
 #endif
 	}
diff --git a/elf/dl-minimal.c b/elf/dl-minimal.c
index c65151cb56..275ad86fe0 100644
--- a/elf/dl-minimal.c
+++ b/elf/dl-minimal.c
@@ -21,8 +21,9 @@
 #include <limits.h>
 #include <string.h>
 #include <unistd.h>
-#include <sys/types.h>
 #include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/types.h>
 #include <ldsodefs.h>
 #include <stdio-common/_itoa.h>
 
@@ -120,6 +121,15 @@ realloc (void *ptr, size_t n)
   assert (new == ptr);
   return new;
 }
+
+/* Return alligned memory block.  */
+void * weak_function
+__libc_memalign (size_t align, size_t n)
+{
+  void *newp = malloc (n + align - 1);
+
+  return (void *) roundup ((uintptr_t) newp, align);
+}
 
 /* Avoid signal frobnication in setjmp/longjmp.  Keeps things smaller.  */
 
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 8f30bc009d..d9ed499054 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -31,6 +31,7 @@
 #include <bp-sym.h>
 
 #include <dl-dst.h>
+#include <dl-tls.h>
 
 
 extern ElfW(Addr) _dl_sysdep_start (void **start_argptr,
@@ -353,6 +354,73 @@ dl_open_worker (void *a)
 	imap->l_scope[cnt++] = &new->l_searchlist;
 	imap->l_scope[cnt] = NULL;
       }
+#if USE_TLS
+    else if (new->l_searchlist.r_list[i]->l_opencount == 1
+	     /* Only if the module defines thread local data.  */
+	     && __builtin_expect (new->l_searchlist.r_list[i]->l_tls_blocksize
+				  > 0, 0))
+      {
+	/* Now that we know the object is loaded successfully add
+	   modules containing TLS data to the dtv info table.  We
+	   might have to increase its size.  */
+	struct dtv_slotinfo_list *listp;
+	struct dtv_slotinfo_list *prevp;
+	size_t idx = new->l_searchlist.r_list[i]->l_tls_modid;
+
+	assert (new->l_searchlist.r_list[i]->l_type == lt_loaded);
+
+	/* Find the place in the stv slotinfo list.  */
+	listp = GL(dl_tls_dtv_slotinfo_list);
+	prevp = NULL;		/* Needed to shut up gcc.  */
+	do
+	  {
+	    /* Does it fit in the array of this list element?  */
+	    if (idx <= listp->len)
+	      break;
+	    prevp = listp;
+	  }
+	while ((listp = listp->next) != NULL);
+
+	if (listp == NULL)
+	  {
+	    /* When we come here it means we have to add a new element
+	       to the slotinfo list.  And the new module must be in
+	       the first slot.  */
+	    assert (idx == 0);
+
+	    listp = prevp->next = (struct dtv_slotinfo_list *)
+	      malloc (sizeof (struct dtv_slotinfo_list)
+		      + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+	    if (listp == NULL)
+	      {
+		/* We ran out of memory.  We will simply fail this
+		   call but don't undo anything we did so far.  The
+		   application will crash or be terminated anyway very
+		   soon.  */
+
+		/* We have to do this since some entries in the dtv
+		   slotinfo array might already point to this
+		   generation.  */
+		++GL(dl_tls_generation);
+
+		_dl_signal_error (ENOMEM, "dlopen", NULL,
+				  N_("cannot create TLS data structures"));
+	      }
+
+	    listp->len = TLS_SLOTINFO_SURPLUS;
+	    listp->next = NULL;
+	    memset (listp->slotinfo, '\0',
+		    TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+	  }
+
+	/* Add the information into the slotinfo data structure.  */
+	listp->slotinfo[idx].map = new->l_searchlist.r_list[i];
+	listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
+      }
+
+  /* Bump the generation number.  */
+  ++GL(dl_tls_generation);
+#endif
 
   /* Run the initializer functions of new objects.  */
   _dl_init (new, __libc_argc, __libc_argv, __environ);
@@ -424,10 +492,18 @@ _dl_open (const char *file, int mode, const void *caller)
 	{
 	  unsigned int i;
 
-	  /* Increment open counters for all objects since this has
-	     not happened yet.  */
-	  for (i = 0; i < args.map->l_searchlist.r_nlist; ++i)
-	    ++args.map->l_searchlist.r_list[i]->l_opencount;
+	  /* Increment open counters for all objects since this
+	     sometimes has not happened yet.  */
+	  if (args.map->l_searchlist.r_list[0]->l_opencount == 0)
+	    for (i = 0; i < args.map->l_searchlist.r_nlist; ++i)
+	      ++args.map->l_searchlist.r_list[i]->l_opencount;
+
+	  /* Maybe some of the modules which were loaded uses TLS.
+	     Since it will be removed in the folowing _dl_close call
+	     we have to mark the dtv array as having gaps to fill
+	     the holes.  This is a pessimistic assumption which won't
+	     hurt if not true.  */
+	  GL(dl_tls_dtv_gaps) = true;
 
 	  _dl_close (args.map);
 	}
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 914b43f057..876776acba 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -138,20 +138,22 @@ int _dl_correct_cache_id = _DL_CACHE_DEFAULT_ID;
 __libc_lock_define_initialized_recursive (, _dl_load_lock)
 
 #ifdef USE_TLS
-/* Beginning of the list of link maps for objects which contain
-   thread-local storage sections.  This will be traversed to
-   initialize new TLS blocks.  */
-struct link_map *_dl_initimage_list;
 
 /* Highest dtv index currently needed.  */
 size_t _dl_tls_max_dtv_idx;
 /* Flag signalling whether there are gaps in the module ID allocation.  */
 bool _dl_tls_dtv_gaps;
-
+/* Information about the dtv slots.  */
+struct dtv_slotinfo_list *_dl_tls_dtv_slotinfo_list;
+/* Number of modules in the static TLS block.  */
+size_t _dl_tls_static_nelem;
 /* Size of the static TLS block.  */
 size_t _dl_tls_static_size;
 /* Alignment requirement of the static TLS block.  */
 size_t _dl_tls_static_align;
+
+/* Generation counter for the dtv.  */
+size_t _dl_tls_generation;
 #endif
 
 
diff --git a/elf/dl-sym.c b/elf/dl-sym.c
index b0db159aa6..8cb5d2f411 100644
--- a/elf/dl-sym.c
+++ b/elf/dl-sym.c
@@ -83,7 +83,16 @@ RTLD_NEXT used in code not dynamically loaded"));
     }
 
   if (ref != NULL)
-    return DL_SYMBOL_ADDRESS (result, ref);
+    {
+#if defined USE_TLS && defined SHARED
+      if (ELFW(ST_TYPE) (ref->st_info) == STT_TLS)
+	/* The found symbol is a thread-local storage variable.
+	   Return the address for to the current thread.  */
+	return _dl_tls_symaddr (result, ref);
+#endif
+
+      return DL_SYMBOL_ADDRESS (result, ref);
+    }
 
   return NULL;
 }
@@ -152,7 +161,16 @@ RTLD_NEXT used in code not dynamically loaded"));
     }
 
   if (ref != NULL)
-    return DL_SYMBOL_ADDRESS (result, ref);
+    {
+#if defined USE_TLS && defined SHARED
+      if (ELFW(ST_TYPE) (ref->st_info) == STT_TLS)
+	/* The found symbol is a thread-local storage variable.
+	   Return the address for to the current thread.  */
+	return _dl_tls_symaddr (result, ref);
+#endif
+
+      return DL_SYMBOL_ADDRESS (result, ref);
+    }
 
   return NULL;
 }
diff --git a/elf/rtld.c b/elf/rtld.c
index 6463ed600e..d7653f2394 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -223,7 +223,7 @@ _dl_start_final (void *arg, struct link_map *bootstrap_map_p,
   ElfW(Ehdr) *ehdr;
   ElfW(Phdr) *phdr;
   size_t cnt;
-  dtv_t initdtv[2];
+  dtv_t initdtv[3];
 #endif
 
   if (HP_TIMING_AVAIL)
@@ -291,16 +291,18 @@ _dl_start_final (void *arg, struct link_map *bootstrap_map_p,
 	tlsblock = (void *) (((uintptr_t) tlsblock + max_align - 1)
 			     & ~(max_align - 1));
 
-	/* Initialize the dtv.  */
+	/* Initialize the dtv.  [0] is the length, [1] the generation
+	   counter.  */
 	initdtv[0].counter = 1;
+	initdtv[1].counter = 0;
 
 	/* Initialize the TLS block.  */
 # if TLS_TCB_AT_TP
-	initdtv[1].pointer = tlsblock;
+	initdtv[2].pointer = tlsblock;
 # elif TLS_DTV_AT_TP
 	GL(dl_rtld_map).l_tls_offset = roundup (TLS_INIT_TCB_SIZE,
 						GL(dl_rtld_map).l_tls_align);
-	initdtv[1].pointer = (char *) tlsblock + GL(dl_rtld_map).l_tls_offset);
+	initdtv[2].pointer = (char *) tlsblock + GL(dl_rtld_map).l_tls_offset);
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
@@ -723,22 +725,20 @@ of this helper program; chances are you did not intend to run this program.\n\
 	break;
 #ifdef USE_TLS
       case PT_TLS:
-	/* Note that in the case the dynamic linker we duplicate work
-	   here since we read the PT_TLS entry already in
-	   _dl_start_final.  But the result is repeatable so do not
-	   check for this special but unimportant case.  */
-	GL(dl_loaded)->l_tls_blocksize = ph->p_memsz;
-	GL(dl_loaded)->l_tls_align = ph->p_align;
-	GL(dl_loaded)->l_tls_initimage_size = ph->p_filesz;
-	GL(dl_loaded)->l_tls_initimage = (void *) ph->p_vaddr;
-	/* This is the first element of the initialization image list.
-	   We create the list as circular since we have to append at
-	   the end.  */
-	GL(dl_initimage_list) = GL(dl_loaded)->l_tls_nextimage
-	  = GL(dl_loaded)->l_tls_previmage = GL(dl_loaded);
-
-	/* This image gets the ID one.  */
-	GL(dl_tls_max_dtv_idx) = GL(dl_loaded)->l_tls_modid = 1;
+	if (ph->p_memsz > 0)
+	  {
+	    /* Note that in the case the dynamic linker we duplicate work
+	       here since we read the PT_TLS entry already in
+	       _dl_start_final.  But the result is repeatable so do not
+	       check for this special but unimportant case.  */
+	    GL(dl_loaded)->l_tls_blocksize = ph->p_memsz;
+	    GL(dl_loaded)->l_tls_align = ph->p_align;
+	    GL(dl_loaded)->l_tls_initimage_size = ph->p_filesz;
+	    GL(dl_loaded)->l_tls_initimage = (void *) ph->p_vaddr;
+
+	    /* This image gets the ID one.  */
+	    GL(dl_tls_max_dtv_idx) = GL(dl_loaded)->l_tls_modid = 1;
+	  }
 	break;
 #endif
       }
@@ -1188,43 +1188,66 @@ of this helper program; chances are you did not intend to run this program.\n\
      use the static model.  First add the dynamic linker to the list
      if it also uses TLS.  */
   if (GL(dl_rtld_map).l_tls_blocksize != 0)
+    /* Assign a module ID.  */
+    GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid ();
+
+# ifndef SHARED
+  /* If dynamic loading of modules with TLS is impossible we do not
+     have to initialize any of the TLS functionality unless any of the
+     initial modules uses TLS.  */
+  if (GL(dl_tls_max_dtv_idx) > 0)
+# endif
     {
-      /* At to the list.  */
-      if (GL(dl_initimage_list) == NULL)
-	GL(dl_initimage_list) = GL(dl_rtld_map).l_tls_nextimage
-	  = GL(dl_rtld_map).l_tls_previmage = &GL(dl_rtld_map);
-	  else
-	    {
-	      GL(dl_rtld_map).l_tls_nextimage
-		= GL(dl_initimage_list)->l_tls_nextimage;
-	      GL(dl_rtld_map).l_tls_nextimage->l_tls_previmage
-		= &GL(dl_rtld_map);
-	      GL(dl_rtld_map).l_tls_previmage = GL(dl_initimage_list);
-	      GL(dl_rtld_map).l_tls_previmage->l_tls_nextimage
-		= &GL(dl_rtld_map);
-	      GL(dl_initimage_list) = &GL(dl_rtld_map);
-	    }
+      struct link_map *l;
+      size_t nelem;
+      struct dtv_slotinfo *slotinfo;
+
+      /* Number of elements in the static TLS block.  */
+      GL(dl_tls_static_nelem) = GL(dl_tls_max_dtv_idx);
+
+      /* Allocate the array which contains the information about the
+	 dtv slots.  We allocate a few entries more than needed to
+	 avoid the need for reallocation.  */
+      nelem = GL(dl_tls_max_dtv_idx) + 1 + TLS_SLOTINFO_SURPLUS;
+
+      /* Allocate.  */
+      GL(dl_tls_dtv_slotinfo_list) = (struct dtv_slotinfo_list *)
+	malloc (sizeof (struct dtv_slotinfo_list)
+		+ nelem * sizeof (struct dtv_slotinfo));
+      /* No need to check the return value.  If memory allocation failed
+	 the program would have been terminated.  */
+
+      slotinfo = memset (GL(dl_tls_dtv_slotinfo_list)->slotinfo, '\0',
+			 nelem * sizeof (struct dtv_slotinfo));
+      GL(dl_tls_dtv_slotinfo_list)->len = nelem;
+      GL(dl_tls_dtv_slotinfo_list)->next = NULL;
+
+      /* Fill in the information from the loaded modules.  */
+      for (l = GL(dl_loaded), i = 0; l != NULL; l = l->l_next)
+	if (l->l_tls_blocksize != 0)
+	  /* This is a module with TLS data.  Store the map reference.
+	     The generation counter is zero.  */
+	  slotinfo[++i].map = l;
+      assert (i == GL(dl_tls_max_dtv_idx));
+
+      /* Computer the TLS offsets for the various blocks.  We call this
+	 function even if none of the modules available at startup time
+	 uses TLS to initialize some variables.  */
+      _dl_determine_tlsoffset ();
+
+      /* Construct the static TLS block and the dtv for the initial
+	 thread.  For some platforms this will include allocating memory
+	 for the thread descriptor.  The memory for the TLS block will
+	 never be freed.  It should be allocated accordingly.  The dtv
+	 array can be changed if dynamic loading requires it.  */
+      tcbp = _dl_allocate_tls ();
+      if (tcbp == NULL)
+	_dl_fatal_printf ("\
+cannot allocate TLS data structures for inital thread");
 
-      /* Assign a module ID.  */
-      GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid ();
+      /* And finally install it for the main thread.  */
+      TLS_INIT_TP (tcbp);
     }
-
-  /* Computer the TLS offsets for the various blocks.  We call this
-     function even if none of the modules available at startup time
-     uses TLS to initialize some variables.  */
-    _dl_determine_tlsoffset (GL(dl_initimage_list));
-
-  /* Construct the static TLS block and the dtv for the initial
-     thread.  For some platforms this will include allocating memory
-     for the thread descriptor.  The memory for the TLS block will
-     never be freed.  It should be allocated accordingly.  The dtv
-     array can be changed if dynamic loading requires it.  */
-  tcbp = _dl_allocate_tls ();
-  if (tcbp == NULL)
-    _dl_fatal_printf ("cannot allocate TLS data structures for inital thread");
-
-  /* And finally install it for the main thread.  */
-  TLS_INIT_TP (tcbp);
 #endif
 
   if (GL(dl_loaded)->l_info [ADDRIDX (DT_GNU_LIBLIST)]
diff --git a/elf/tls-macros.h b/elf/tls-macros.h
index 35b57d4f04..26745e9e21 100644
--- a/elf/tls-macros.h
+++ b/elf/tls-macros.h
@@ -49,15 +49,15 @@
 
 # ifdef PIC
 #  define TLS_LD(x) \
-  ({ int *__l;								      \
+  ({ int *__l, __c, __d;						      \
      asm ("leal " #x "@tlsldm(%%ebx),%%eax\n\t"				      \
 	  "call ___tls_get_addr@plt\n\t"				      \
 	  "leal " #x "@dtpoff(%%eax), %%eax"				      \
-	  : "=a" (__l));						      \
+	  : "=a" (__l), "=&c" (__c), "=&d" (__d));			      \
      __l; })
 # else
 #  define TLS_LD(x) \
-  ({ int *__l, __b;							      \
+  ({ int *__l, __b, __c, __d;						      \
      asm ("call 1f\n\t"							      \
 	  ".subsection 1\n"						      \
 	  "1:\tmovl (%%esp), %%ebx\n\t"					      \
@@ -67,21 +67,21 @@
 	  "leal " #x "@tlsldm(%%ebx),%%eax\n\t"				      \
 	  "call ___tls_get_addr@plt\n\t"				      \
 	  "leal " #x "@dtpoff(%%eax), %%eax"				      \
-	  : "=a" (__l), "=&b" (__b));					      \
+	  : "=a" (__l), "=&b" (__b), "=&c" (__c), "=&d" (__d));		      \
      __l; })
 # endif
 
 # ifdef PIC
 #  define TLS_GD(x) \
-  ({ int *__l;								      \
+  ({ int *__l, __c, __d;						      \
      asm ("leal " #x "@tlsgd(%%ebx),%%eax\n\t"				      \
 	  "call ___tls_get_addr@plt\n\t"				      \
 	  "nop"								      \
-	  : "=a" (__l));						      \
+	  : "=a" (__l), "=&c" (__c), "=&d" (__d));			      \
      __l; })
 # else
 #  define TLS_GD(x) \
-  ({ int *__l, __b;							      \
+  ({ int *__l, __b, __c, __d;						      \
      asm ("call 1f\n\t"							      \
 	  ".subsection 1\n"						      \
 	  "1:\tmovl (%%esp), %%ebx\n\t"					      \
@@ -91,7 +91,7 @@
 	  "leal " #x "@tlsgd(%%ebx),%%eax\n\t"				      \
 	  "call ___tls_get_addr@plt\n\t"				      \
 	  "nop"								      \
-	  : "=a" (__l), "=&b" (__b));					      \
+	  : "=a" (__l), "=&b" (__b), "=&c" (__c), "=&d" (__d));		      \
      __l; })
 # endif
 
diff --git a/elf/tst-tls1.c b/elf/tst-tls1.c
index 5e67482ead..74e9a915b3 100644
--- a/elf/tst-tls1.c
+++ b/elf/tst-tls1.c
@@ -5,13 +5,16 @@
 #include "tls-macros.h"
 
 
+#ifdef USE_TLS
 /* Two common 'int' variables in TLS.  */
 COMMON_INT_DEF(foo);
 COMMON_INT_DEF(bar);
+#endif
 
 
-int
-main (void)
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
 {
 #ifdef USE_TLS
   int result = 0;
@@ -82,3 +85,6 @@ main (void)
   return 0;
 #endif
 }
+
+
+#include "../test-skeleton.c"
diff --git a/elf/tst-tls2.c b/elf/tst-tls2.c
index 1810ffa1e3..a15c5f5fe8 100644
--- a/elf/tst-tls2.c
+++ b/elf/tst-tls2.c
@@ -5,13 +5,16 @@
 #include "tls-macros.h"
 
 
+#ifdef USE_TLS
 /* Two 'int' variables in TLS.  */
 VAR_INT_DEF(foo);
 VAR_INT_DEF(bar);
+#endif
 
 
-int
-main (void)
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
 {
 #ifdef USE_TLS
   int result = 0;
@@ -82,3 +85,6 @@ main (void)
   return 0;
 #endif
 }
+
+
+#include "../test-skeleton.c"
diff --git a/elf/tst-tls3.c b/elf/tst-tls3.c
index c86f1840a5..58bb183c8d 100644
--- a/elf/tst-tls3.c
+++ b/elf/tst-tls3.c
@@ -5,17 +5,20 @@
 #include "tls-macros.h"
 
 
+#ifdef USE_TLS
 /* One define int variable, two externs.  */
 COMMON_INT_DECL(foo);
 VAR_INT_DECL(bar);
 VAR_INT_DEF(baz);
+#endif
 
 
 extern int in_dso (void);
 
 
-int
-main (void)
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
 {
 #ifdef USE_TLS
   int result = 0;
@@ -67,3 +70,6 @@ main (void)
   return 0;
 #endif
 }
+
+
+#include "../test-skeleton.c"
diff --git a/elf/tst-tls4.c b/elf/tst-tls4.c
new file mode 100644
index 0000000000..f92ee53ce5
--- /dev/null
+++ b/elf/tst-tls4.c
@@ -0,0 +1,56 @@
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <tls.h>
+
+
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
+{
+#ifdef USE_TLS
+  static const char modname[] = "tst-tlsmod2.so";
+  int result = 0;
+  int *foop;
+  int (*fp) (int, int *);
+  void *h;
+
+  h = dlopen (modname, RTLD_LAZY);
+  if (h == NULL)
+    {
+      printf ("cannot open '%s': %s\n", modname, dlerror ());
+      exit (1);
+    }
+
+  fp = dlsym (h, "in_dso");
+  if (fp == NULL)
+    {
+      printf ("cannot get symbol 'in_dso': %s\n", dlerror ());
+      exit (1);
+    }
+
+  result |= fp (0, NULL);
+
+  foop = dlsym (h, "foo");
+  if (foop == NULL)
+    {
+      printf ("cannot get symbol 'foo' the second time: %s\n", dlerror ());
+      exit (1);
+    }
+  if (*foop != 16)
+    {
+      puts ("foo != 16");
+      result = 1;
+    }
+
+  dlclose (h);
+
+  return result;
+#else
+  return 0;
+#endif
+}
+
+
+#include "../test-skeleton.c"
diff --git a/elf/tst-tls5.c b/elf/tst-tls5.c
new file mode 100644
index 0000000000..a571d2cd3f
--- /dev/null
+++ b/elf/tst-tls5.c
@@ -0,0 +1,72 @@
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <tls.h>
+
+
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
+{
+#ifdef USE_TLS
+  static const char modname[] = "tst-tlsmod2.so";
+  int result = 0;
+  int *foop;
+  int *foop2;
+  int (*fp) (int, int *);
+  void *h;
+
+  h = dlopen (modname, RTLD_LAZY);
+  if (h == NULL)
+    {
+      printf ("cannot open '%s': %s\n", modname, dlerror ());
+      exit (1);
+    }
+
+  foop = dlsym (h, "foo");
+  if (foop == NULL)
+    {
+      printf ("cannot get symbol 'foo': %s\n", dlerror ());
+      exit (1);
+    }
+
+  *foop = 42;
+
+  fp = dlsym (h, "in_dso");
+  if (fp == NULL)
+    {
+      printf ("cannot get symbol 'in_dso': %s\n", dlerror ());
+      exit (1);
+    }
+
+  result |= fp (42, foop);
+
+  foop2 = dlsym (h, "foo");
+  if (foop2 == NULL)
+    {
+      printf ("cannot get symbol 'foo' the second time: %s\n", dlerror ());
+      exit (1);
+    }
+
+  if (foop != foop2)
+    {
+      puts ("address of 'foo' different the second time");
+      result = 1;
+    }
+  else if (*foop != 16)
+    {
+      puts ("foo != 16");
+      result = 1;
+    }
+
+  dlclose (h);
+
+  return result;
+#else
+  return 0;
+#endif
+}
+
+
+#include "../test-skeleton.c"
diff --git a/elf/tst-tlsmod1.c b/elf/tst-tlsmod1.c
index 3a632865f8..cca8df6549 100644
--- a/elf/tst-tlsmod1.c
+++ b/elf/tst-tlsmod1.c
@@ -4,10 +4,12 @@
 #include "tls-macros.h"
 
 
+#ifdef USE_TLS
 /* One define int variable, two externs.  */
 COMMON_INT_DEF(foo);
 VAR_INT_DEF(bar);
 VAR_INT_DECL(baz);
+#endif
 
 
 int
diff --git a/elf/tst-tlsmod2.c b/elf/tst-tlsmod2.c
new file mode 100644
index 0000000000..30ed67163d
--- /dev/null
+++ b/elf/tst-tlsmod2.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+
+#include <tls.h>
+#include "tls-macros.h"
+
+#ifdef USE_TLS
+
+COMMON_INT_DEF(foo);
+
+
+int
+in_dso (int n, int *caller_foop)
+{
+  int *foop = TLS_GD (foo);
+  int result = 0;
+
+  if (caller_foop != NULL && foop != caller_foop)
+    {
+      printf ("callers address of foo differs: %p vs %p\n", caller_foop, foop);
+      result = 1;
+    }
+  else if (*foop != n)
+    {
+      printf ("foo != %d\n", n);
+      result = 1;
+    }
+
+  *foop = 16;
+
+  return result;
+}
+#endif
diff --git a/include/link.h b/include/link.h
index ca3784bc66..e36aafca00 100644
--- a/include/link.h
+++ b/include/link.h
@@ -259,10 +259,6 @@ struct link_map
 #ifdef USE_TLS
     /* Thread-local storage related info.  */
 
-    /* Next module in list of initialization images.  */
-    struct link_map *l_tls_nextimage;
-    /* Previous module in list of initialization images.  */
-    struct link_map *l_tls_previmage;
     /* Start of the initialization image.  */
     void *l_tls_initimage;
     /* Size of the initialization image.  */
diff --git a/include/stdlib.h b/include/stdlib.h
index 15fc179e2d..c76763ef51 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -62,6 +62,8 @@ extern void __cxa_finalize (void *d);
 
 extern int __posix_memalign (void **memptr, size_t alignment, size_t size)
      __attribute_malloc__;
+extern void *__libc_memalign (size_t alignment, size_t size)
+     __attribute_malloc__;
 
 extern int __libc_system (const char *line);
 
diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog
index 79a3ccec61..fee9a7a67b 100644
--- a/linuxthreads/ChangeLog
+++ b/linuxthreads/ChangeLog
@@ -1,8 +1,13 @@
+2002-02-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/i386/tls.c (INSTALL_NEW_DTV): Define.
+	(INSTALL_DTV): Adjust for being passed pointer to element with length.
+
 2002-02-08  Ulrich Drepper  <drepper@redhat.com>
 
 	* sysdeps/i386/tls.h (TLS_INIT_TP): Also initialize %gs.
 
-2002-02-08  Richard Henderson  <rth@twiddle.net>
+2002-02-08  Richard Henderson  <rth@redhat.com>
 
 	* sysdeps/alpha/elf/pt-initfini.c: Use \n\ for multiline string.
 
diff --git a/linuxthreads/sysdeps/i386/tls.h b/linuxthreads/sysdeps/i386/tls.h
index 79e4fde9c7..8e1d4698b0 100644
--- a/linuxthreads/sysdeps/i386/tls.h
+++ b/linuxthreads/sysdeps/i386/tls.h
@@ -67,9 +67,15 @@ typedef struct
 # define TLS_TCB_AT_TP	1
 
 
-/* Install the dtv pointer.  */
+/* Install the dtv pointer.  The pointer passed is to the element with
+   index -1 which contain the length.  */
 # define INSTALL_DTV(descr, dtvp) \
-  ((tcbhead_t *) descr)->dtv = dtvp
+  ((tcbhead_t *) descr)->dtv = dtvp + 1
+
+/* Install new dtv for current thread.  */
+# define INSTALL_NEW_DTV(dtv) \
+  ({ struct _pthread_descr_struct *__descr;				      \
+     THREAD_SETMEM (__descr, p_header.data.dtvp, dtv); })
 
 /* Code to initially initialize the thread pointer.  This might need
    special attention since 'errno' is not yet available and if the
diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c
index e42911acd3..d13b0f93fa 100644
--- a/sysdeps/generic/dl-tls.c
+++ b/sysdeps/generic/dl-tls.c
@@ -18,8 +18,12 @@
    02111-1307 USA.  */
 
 #include <assert.h>
+#include <signal.h>
 #include <stdlib.h>
+#include <unistd.h>
+#include <sys/param.h>
 
+#include <abort-instr.h>
 #include <tls.h>
 
 /* We don't need any of this if TLS is not supported.  */
@@ -29,7 +33,31 @@
 #include <ldsodefs.h>
 
 /* Value used for dtv entries for which the allocation is delayed.  */
-# define TLS_DTV_UNALLOCATE	((void *) -1l)
+# define TLS_DTV_UNALLOCATED	((void *) -1l)
+
+
+/* Out-of-memory handler.  */
+static void
+__attribute__ ((__noreturn__))
+oom (void)
+{
+  static const char msg[] = "\
+cannot allocate memory for thread-local data: ABORT\n";
+
+  __libc_write (STDERR_FILENO, msg, sizeof (msg) - 1);
+
+  /* Kill ourself.  */
+  __kill (__getpid (), SIGKILL);
+
+  /* Just in case something goes wrong with the kill.  */
+  while (1)
+    {
+# ifdef ABORT_INSTRUCTION
+      ABORT_INSTRUCTION;
+# endif
+    }
+}
+
 
 
 size_t
@@ -40,38 +68,49 @@ _dl_next_tls_modid (void)
 
   if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
     {
-      /* XXX If this method proves too costly we can optimize
-	 it to use a constant time method.  But I don't think
-	 it's a problem.  */
-      struct link_map *runp = GL(dl_initimage_list);
-      bool used[GL(dl_tls_max_dtv_idx)];
-
-      assert (runp != NULL);
+      size_t disp = 0;
+      struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
+
+      /* Note that this branch will never be executed during program
+	 start since there are no gaps at that time.  Therefore it
+	 does not matter that the dl_tls_dtv_slotinfo is not allocated
+	 yet when the function is called for the first times.  */
+      result = GL(dl_tls_static_nelem);
+      assert (result < GL(dl_tls_max_dtv_idx));
       do
 	{
-	  assert (runp->l_tls_modid > 0
-		  && runp->l_tls_modid <= GL(dl_tls_max_dtv_idx));
-	  used[runp->l_tls_modid - 1] = true;
-	}
-      while ((runp = runp->l_tls_nextimage) != GL(dl_initimage_list));
+	  while (result - disp < runp->len)
+	    if (runp->slotinfo[result - disp].map == NULL)
+	      break;
 
-      result = 0;
-      do
-	/* The information about the gaps is pessimistic.  It might be
-	   there are actually none.  */
-	if (result >= GL(dl_tls_max_dtv_idx))
-	  {
-	    /* Now we know there is actually no gap.  Bump the maximum
-	       ID number and remember that there are no gaps.  */
-	    result = ++GL(dl_tls_max_dtv_idx);
-	    GL(dl_tls_dtv_gaps) = false;
+	  ++result;
+	  assert (result <= GL(dl_tls_max_dtv_idx) + 1);
+
+	  if (result - disp < runp->len)
 	    break;
-	  }
-      while (used[result++]);
+
+	  disp += runp->len;
+	}
+      while ((runp = runp->next) != NULL);
+
+      if (result >= GL(dl_tls_max_dtv_idx) + 1)
+	{
+	  /* The new index must indeed be exactly one higher than the
+	     previous high.  */
+	  assert (result == GL(dl_tls_max_dtv_idx) + 1);
+
+	  /* There is no gap anymore.  */
+	  GL(dl_tls_dtv_gaps) = false;
+
+	  goto nogaps;
+	}
     }
   else
-    /* No gaps, allocate a new entry.  */
-    result = ++GL(dl_tls_max_dtv_idx);
+    {
+      /* No gaps, allocate a new entry.  */
+    nogaps:
+      result = ++GL(dl_tls_max_dtv_idx);
+    }
 
   return result;
 }
@@ -79,41 +118,39 @@ _dl_next_tls_modid (void)
 
 void
 internal_function
-_dl_determine_tlsoffset (struct link_map *lastp)
+_dl_determine_tlsoffset (void)
 {
-  struct link_map *runp;
-  size_t max_align = 0;
+  struct dtv_slotinfo *slotinfo;
+  size_t max_align = __alignof__ (void *);
   size_t offset;
+  size_t cnt;
 
-  if (lastp == NULL)
-    {
-      /* None of the objects used at startup time uses TLS.  We still
-	 have to allocate the TCB and dtv.  */
-      GL(dl_tls_static_size) = TLS_TCB_SIZE;
-      GL(dl_tls_static_align) = TLS_TCB_ALIGN;
-
-      return;
-    }
+  /* The first element of the dtv slot info list is allocated.  */
+  assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
+  /* There is at this point only one element in the
+     dl_tls_dtv_slotinfo_list list.  */
+  assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
 
 # if TLS_TCB_AT_TP
   /* We simply start with zero.  */
   offset = 0;
 
-  runp = lastp->l_tls_nextimage;
-  do
+  slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
+  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
     {
-      max_align = MAX (max_align, runp->l_tls_align);
+      assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
+
+      max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
 
       /* Compute the offset of the next TLS block.  */
-      offset = roundup (offset + runp->l_tls_blocksize, runp->l_tls_align);
+      offset = roundup (offset + slotinfo[cnt].map->l_tls_blocksize,
+			slotinfo[cnt].map->l_tls_align);
 
       /* XXX For some architectures we perhaps should store the
 	 negative offset.  */
-      runp->l_tls_offset = offset;
+      slotinfo[cnt].map->l_tls_offset = offset;
     }
-  while ((runp = runp->l_tls_nextimage) != lastp->l_tls_nextimage);
 
-#if 0
   /* The thread descriptor (pointed to by the thread pointer) has its
      own alignment requirement.  Adjust the static TLS size
      and TLS offsets appropriately.  */
@@ -121,34 +158,44 @@ _dl_determine_tlsoffset (struct link_map *lastp)
   // XXX after the first (closest to the TCB) TLS block since this
   // XXX would invalidate the offsets the linker creates for the LE
   // XXX model.
-  if (offset % TLS_TCB_ALIGN != 0)
-    abort ();
-#endif
 
   GL(dl_tls_static_size) = offset + TLS_TCB_SIZE;
 # elif TLS_DTV_AT_TP
-  struct link_map *prevp;
-
-  /* The first block starts right after the TCB.  */
+  /* The TLS blocks start right after the TCB.  */
   offset = TLS_TCB_SIZE;
-  max_align = runp->l_tls_align;
-  runp = lastp->l_tls_nextimage;
-  runp->l_tls_offset = offset;
-  prevp = runp;
 
-  while ((runp = runp->l_tls_nextimage) != firstp)
+  /* The first block starts right after the TCB.  */
+  slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
+  if (slotinfo[1].map != NULL)
     {
-      max_align = MAX (max_align, runp->l_tls_align);
+      size_t prev_size
 
-      /* Compute the offset of the next TLS block.  */
-      offset = roundup (offset + prevp->l_tls_blocksize, runp->l_tls_align);
+      offset = roundup (offset, slotinfo[1].map->l_tls_align);
+      slotinfo[1].map->l_tls_offset = offset;
+      max_align = slotinfo[1].map->l_tls_align;
+      prev_size = slotinfo[1].map->l_tls_blocksize;
 
-      runp->l_tls_offset = offset;
+      for (cnt = 2; slotinfo[cnt].map != NULL; ++cnt)
+	{
+	  assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
+
+	  max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
+
+	  /* Compute the offset of the next TLS block.  */
+	  offset = roundup (offset + prev_size,
+			    slotinfo[cnt].map->l_tls_align);
+
+	  /* XXX For some architectures we perhaps should store the
+	     negative offset.  */
+	  slotinfo[cnt].map->l_tls_offset = offset;
+
+	  prev_size = slotinfo[cnt].map->l_tls_blocksize;
+	}
 
-      prevp = runp;
+      offset += prev_size;
     }
 
-  GL(dl_tls_static_size) = offset + prevp->l_tls_blocksize;
+  GL(dl_tls_static_size) = offset;
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
@@ -164,59 +211,100 @@ _dl_allocate_tls (void)
 {
   void *result;
   dtv_t *dtv;
+  size_t dtv_length;
 
   /* Allocate a correctly aligned chunk of memory.  */
   /* XXX For now */
   assert (GL(dl_tls_static_align) <= GL(dl_pagesize));
-#ifdef MAP_ANON
-# define _dl_zerofd (-1)
-#else
-# define _dl_zerofd GL(dl_zerofd)
+# ifdef MAP_ANON
+#  define _dl_zerofd (-1)
+# else
+#  define _dl_zerofd GL(dl_zerofd)
   if ((dl_zerofd) == -1)
     GL(dl_zerofd) = _dl_sysdep_open_zero_fill ();
-# define MAP_ANON 0
-#endif
+#  define MAP_ANON 0
+# endif
   result = __mmap (0, GL(dl_tls_static_size), PROT_READ|PROT_WRITE,
 		   MAP_ANON|MAP_PRIVATE, _dl_zerofd, 0);
 
-  dtv = (dtv_t *) malloc ((GL(dl_tls_max_dtv_idx) + 1) * sizeof (dtv_t));
+  /* We allocate a few more elements in the dtv than are needed for the
+     initial set of modules.  This should avoid in most cases expansions
+     of the dtv.  */
+  dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+  dtv = (dtv_t *) malloc ((dtv_length + 2) * sizeof (dtv_t));
   if (result != MAP_FAILED && dtv != NULL)
     {
-      struct link_map *runp;
+      struct dtv_slotinfo_list *listp;
+      bool first_block = true;
+      size_t total = 0;
 
 # if TLS_TCB_AT_TP
       /* The TCB follows the TLS blocks.  */
       result = (char *) result + GL(dl_tls_static_size) - TLS_TCB_SIZE;
 # endif
 
-      /* XXX Fill in an correct generation number.  */
-      dtv[0].counter = 0;
-
-      /* Initialize the memory from the initialization image list and clear
-	 the BSS parts.  */
-      if (GL(dl_initimage_list) != NULL)
+      /* This is the initial length of the dtv.  */
+      dtv[0].counter = dtv_length;
+      /* Fill in the generation number.  */
+      dtv[1].counter = GL(dl_tls_generation) = 0;
+      /* Initialize all of the rest of the dtv with zero to indicate
+	 nothing there.  */
+      memset (dtv + 2, '\0', dtv_length * sizeof (dtv_t));
+
+      /* We have to look prepare the dtv for all currently loaded
+	 modules using TLS.  For those which are dynamically loaded we
+	 add the values indicating deferred allocation.  */
+      listp = GL(dl_tls_dtv_slotinfo_list);
+      while (1)
 	{
-	  runp = GL(dl_initimage_list)->l_tls_nextimage;
-	  do
+	  size_t cnt;
+
+	  for (cnt = first_block ? 1 : 0; cnt < listp->len; ++cnt)
 	    {
-	      assert (runp->l_tls_modid > 0);
-	      assert (runp->l_tls_modid <= GL(dl_tls_max_dtv_idx));
+	      struct link_map *map;
+	      void *dest;
+
+	      /* Check for the total number of used slots.  */
+	      if (total + cnt >= GL(dl_tls_max_dtv_idx))
+		break;
+
+	      map = listp->slotinfo[cnt].map;
+	      if (map == NULL)
+		/* Unused entry.  */
+		continue;
+
+	      if (map->l_type == lt_loaded)
+		{
+		  /* For dynamically loaded modules we simply store
+		     the value indicating deferred allocation.  */
+		  dtv[1 + map->l_tls_modid].pointer = TLS_DTV_UNALLOCATED;
+		  continue;
+		}
+
+	      assert (map->l_tls_modid == cnt);
+	      assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
 # if TLS_TCB_AT_TP
-	      dtv[runp->l_tls_modid].pointer = result - runp->l_tls_offset;
+	      assert (map->l_tls_offset >= map->l_tls_blocksize);
+	      dest = (char *) result - map->l_tls_offset;
 # elif TLS_DTV_AT_TP
-	      dtv[runp->l_tls_modid].pointer = result + runp->l_tls_offset;
+	      dest = (char *) result + map->l_tls_offset;
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
 
-	      memset (__mempcpy (dtv[runp->l_tls_modid].pointer,
-				 runp->l_tls_initimage,
-				 runp->l_tls_initimage_size),
-		      '\0',
-		      runp->l_tls_blocksize - runp->l_tls_initimage_size);
+	      /* We don't have to clear the BSS part of the TLS block
+		 since mmap is used to allocate the memory which
+		 guarantees it is initialized to zero.  */
+	      dtv[1 + cnt].pointer = memcpy (dest, map->l_tls_initimage,
+					     map->l_tls_initimage_size);
 	    }
-	  while ((runp = runp->l_tls_nextimage)
-		 !=  GL(dl_initimage_list)->l_tls_nextimage);
+
+	  total += cnt;
+	  if (total >= GL(dl_tls_max_dtv_idx))
+	    break;
+
+	  listp = listp->next;
+	  assert (listp != NULL);
 	}
 
       /* Add the dtv to the thread data structures.  */
@@ -232,6 +320,7 @@ _dl_allocate_tls (void)
 }
 
 
+# ifdef SHARED
 /* The __tls_get_addr function has two basic forms which differ in the
    arguments.  The IA-64 form takes two parameters, the module ID and
    offset.  The form used, among others, on IA-32 takes a reference to
@@ -239,26 +328,227 @@ _dl_allocate_tls (void)
    form seems to be more often used (in the moment) so we default to
    it.  Users of the IA-64 form have to provide adequate definitions
    of the following macros.  */
-# ifndef GET_ADDR_ARGS
-#  define GET_ADDR_ARGS tls_index *ti
-# endif
-# ifndef GET_ADDR_MODULE
-#  define GET_ADDR_MODULE ti->ti_module
-# endif
-# ifndef GET_ADDR_OFFSET
-#  define GET_ADDR_OFFSET ti->ti_offset
-# endif
+#  ifndef GET_ADDR_ARGS
+#   define GET_ADDR_ARGS tls_index *ti
+#  endif
+#  ifndef GET_ADDR_MODULE
+#   define GET_ADDR_MODULE ti->ti_module
+#  endif
+#  ifndef GET_ADDR_OFFSET
+#   define GET_ADDR_OFFSET ti->ti_offset
+#  endif
+/* Systems which do not have tls_index also probably have to define
+   DONT_USE_TLS_INDEX.  */
+
+#  ifndef __TLS_GET_ADDR
+#   define __TLS_GET_ADDR __tls_get_addr
+#  endif
+
+
+/* Return the symbol address given the map of the module it is in and
+   the symbol record.  This is used in dl-sym.c.  */
+void *
+internal_function
+_dl_tls_symaddr (struct link_map *map, const ElfW(Sym) *ref)
+{
+#  ifndef DONT_USE_TLS_INDEX
+  tls_index tmp =
+    {
+      .ti_module = map->l_tls_modid,
+      .ti_offset = ref->st_value
+    };
+
+  return __TLS_GET_ADDR (&tmp);
+#  else
+  return __TLS_GET_ADDR (map->l_tls_modid, ref->st_value);
+#  endif
+}
+
+
+static void *
+allocate_and_init (struct link_map *map)
+{
+  void *newp;
+
+  newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
+  if (newp == NULL)
+    oom ();
 
+  /* Initialize the memory.  */
+  memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
+	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
+  return newp;
+}
+
+
+/* The generic dynamic and local dynamic model cannot be used in
+   statically linked applications.  */
 void *
 __tls_get_addr (GET_ADDR_ARGS)
 {
   dtv_t *dtv = THREAD_DTV ();
+  struct link_map *the_map = NULL;
+  void *p;
+
+  if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
+    {
+      struct dtv_slotinfo_list *listp;
+      size_t idx;
+
+      /* The global dl_tls_dtv_slotinfo array contains for each module
+	 index the generation counter current when the entry was
+	 created.  This array never shrinks so that all module indices
+	 which were valid at some time can be used to access it.
+	 Before the first use of a new module index in this function
+	 the array was extended appropriately.  Access also does not
+	 have to be guarded against modifications of the array.  It is
+	 assumed that pointer-size values can be read atomically even
+	 in SMP environments.  It is possible that other threads at
+	 the same time dynamically load code and therefore add to the
+	 slotinfo list.  This is a problem since we must not pick up
+	 any information about incomplete work.  The solution to this
+	 is to ignore all dtv slots which were created after the one
+	 we are currently interested.  We know that dynamic loading
+	 for this module is completed and this is the last load
+	 operation we know finished.  */
+      idx = GET_ADDR_MODULE;
+      listp = GL(dl_tls_dtv_slotinfo_list);
+      while (idx >= listp->len)
+	{
+	  idx -= listp->len;
+	  listp = listp->next;
+	}
 
-  if (dtv[GET_ADDR_MODULE].pointer == TLS_DTV_UNALLOCATE)
-    /* XXX */;
+      if (dtv[0].counter < listp->slotinfo[idx].gen)
+	{
+	  /* The generation counter for the slot is higher than what
+	     the current dtv implements.  We have to update the whole
+	     dtv but only those entries with a generation counter <=
+	     the one for the entry we need.  */
+	  size_t new_gen = listp->slotinfo[idx].gen;
+	  size_t total = 0;
+
+	  /* We have to look through the entire dtv slotinfo list.  */
+	  listp =  GL(dl_tls_dtv_slotinfo_list);
+	  do
+	    {
+	      size_t cnt;
+
+	      for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+		{
+		  size_t gen = listp->slotinfo[cnt].gen;
+		  struct link_map *map;
+		  size_t modid;
+
+		  if (gen > new_gen)
+		    /* This is a slot for a generation younger than
+		       the one we are handling now.  It might be
+		       incompletely set up so ignore it.  */
+		    continue;
+
+		  /* If the entry is older than the current dtv layout
+		     we know we don't have to handle it.  */
+		  if (gen <= dtv[0].counter)
+		    continue;
+
+		  /* If there is no map this means the entry is empty.  */
+		  map = listp->slotinfo[cnt].map;
+		  if (map == NULL)
+		    {
+		      /* If this modid was used at some point the memory
+			 might still be allocated.  */
+		      if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+			free (dtv[total + cnt].pointer);
+
+		      continue;
+		    }
+
+		  /* Check whether the current dtv array is large enough.  */
+		  modid = map->l_tls_modid;
+		  assert (total + cnt == modid);
+		  if (dtv[-1].counter < modid)
+		    {
+		      /* Reallocate the dtv.  */
+		      dtv_t *newp;
+		      size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+		      size_t oldsize = dtv[-1].counter;
+
+		      assert (map->l_tls_modid <= newsize);
+
+		      newp = (dtv_t *) realloc (&dtv[-1],
+						(2 + newsize)
+						* sizeof (dtv_t));
+		      if (newp == NULL)
+			oom ();
+
+		      newp[0].counter = newsize;
+
+		      /* Clear the newly allocate part.  */
+		      memset (newp + 2 + oldsize, '\0',
+			      (newsize - oldsize) * sizeof (dtv_t));
+
+		      /* Point dtv to the generation counter.  */
+		      dtv = &newp[1];
+
+		      /* Install this new dtv in the thread data
+			 structures.  */
+		      INSTALL_NEW_DTV (dtv);
+		    }
+
+		  /* If there is currently memory allocate for this
+		     dtv entry free it.  */
+		  /* XXX Ideally we will at some point create a memory
+		     pool.  */
+		  if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+		    /* Note that free is called for NULL is well.  We
+		       deallocate even if it is this dtv entry we are
+		       supposed to load.  The reason is that we call
+		       memalign and not malloc.  */
+		    free (dtv[modid].pointer);
+
+		  /* This module is loaded dynamically- We defer
+		     memory allocation.  */
+		  dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+
+		  if (modid == GET_ADDR_MODULE)
+		    the_map = map;
+		}
+
+	      total += listp->len;
+	    }
+	  while ((listp = listp->next) != NULL);
 
-  return (char *) dtv[GET_ADDR_MODULE].pointer + GET_ADDR_OFFSET;
+	  /* This will be the new maximum generation counter.  */
+	  dtv[0].counter = new_gen;
+	}
+    }
+
+  p = dtv[GET_ADDR_MODULE].pointer;
+
+  if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
+    {
+      /* The allocation was deferred.  Do it now.  */
+      if (the_map == NULL)
+	{
+	  /* Find the link map for this module.  */
+	  size_t idx = GET_ADDR_MODULE;
+	  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+
+	  while (idx >= listp->len)
+	    {
+	      idx -= listp->len;
+	      listp = listp->next;
+	    }
+
+	  the_map = listp->slotinfo[idx].map;
+	}
+
+      p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
+    }
+
+  return (char *) p + GET_ADDR_OFFSET;
 }
+# endif
 
 #endif	/* use TLS */
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index bfad7c6b07..d722198328 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -289,23 +289,40 @@ struct rtld_global
 #endif
 
 #ifdef USE_TLS
-  /* Beginning of the list of link maps for objects which contain
-     thread-local storage sections.  This will be traversed to
-     initialize new TLS blocks.  */
-  EXTERN struct link_map *_dl_initimage_list;
-
   /* Highest dtv index currently needed.  */
   EXTERN size_t _dl_tls_max_dtv_idx;
   /* Flag signalling whether there are gaps in the module ID allocation.  */
   EXTERN bool _dl_tls_dtv_gaps;
-
+  /* Information about the dtv slots.  */
+  EXTERN struct dtv_slotinfo_list
+  {
+    size_t len;
+    struct dtv_slotinfo_list *next;
+    struct dtv_slotinfo
+    {
+      size_t gen;
+      struct link_map *map;
+    } slotinfo[0];
+  } *_dl_tls_dtv_slotinfo_list;
+  /* Number of modules in the static TLS block.  */
+  EXTERN size_t _dl_tls_static_nelem;
   /* Size of the static TLS block.  */
   EXTERN size_t _dl_tls_static_size;
   /* Alignment requirement of the static TLS block.  */
   EXTERN size_t _dl_tls_static_align;
 
+/* Number of additional entries in the slotinfo array of each slotinfo
+   list element.  A large number makes it almost certain take we never
+   have to iterate beyond the first element in the slotinfo list.  */
+# define TLS_SLOTINFO_SURPLUS (62)
+
+/* Number of additional slots in the dtv allocated.  */
+# define DTV_SURPLUS	(14)
+
   /* True if the dtv for the initial thread was malloc()ed.  */
   EXTERN bool _dl_initial_dtv_malloced;
+  /* Generation counter for the dtv.  */
+  EXTERN size_t _dl_tls_generation;
 #endif
 
   /* Name of the shared object to be profiled (if any).  */
@@ -667,12 +684,16 @@ extern void _dl_sysdep_start_cleanup (void)
 extern size_t _dl_next_tls_modid (void) internal_function;
 
 /* Calculate offset of the TLS blocks in the static TLS block.  */
-extern void _dl_determine_tlsoffset (struct link_map *firstp)
-     internal_function;
+extern void _dl_determine_tlsoffset (void) internal_function;
 
 /* Allocate memory for static TLS block and dtv.  */
 extern void *_dl_allocate_tls (void) internal_function;
 
+/* Return the symbol address given the map of the module it is in and
+   the symbol record.  */
+extern void *_dl_tls_symaddr (struct link_map *map, const ElfW(Sym) *ref)
+     internal_function;
+
 __END_DECLS
 
 #endif /* ldsodefs.h */
diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h
index 8e30530542..5066b8dcd6 100644
--- a/sysdeps/i386/dl-tls.h
+++ b/sysdeps/i386/dl-tls.h
@@ -17,6 +17,7 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+
 /* Type used for the representation of TLS information in the GOT.  */
 typedef struct
 {
@@ -25,6 +26,7 @@ typedef struct
 } tls_index;
 
 
+#ifdef SHARED
 /* This is the prototype for the GNU version.  */
 extern void *___tls_get_addr (tls_index *ti)
      __attribute__ ((__regparm__ (1)));
@@ -46,5 +48,7 @@ __tls_get_addr (tls_index *ti)
 
 /* Prepare using the definition of __tls_get_addr in the generic
    version of this file.  */
-#define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr
+# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr
 strong_alias (___tls_get_addr, ___tls_get_addr_internal)
+# define __TLS_GET_ADDR ___tls_get_addr
+#endif