about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/arm/bits/link.h4
-rw-r--r--sysdeps/arm/bits/linkmap.h4
-rw-r--r--sysdeps/generic/bits/link.h5
-rw-r--r--sysdeps/generic/bits/linkmap.h4
-rw-r--r--sysdeps/generic/dl-lookupcfg.h16
-rw-r--r--sysdeps/generic/dl-tls.c402
-rw-r--r--sysdeps/generic/dl-trampoline.c1
-rw-r--r--sysdeps/generic/ldsodefs.h114
-rw-r--r--sysdeps/generic/libc-start.c24
-rw-r--r--sysdeps/generic/libc-tls.c11
-rw-r--r--sysdeps/generic/unsecvars.h17
-rw-r--r--sysdeps/hppa/bits/link.h6
-rw-r--r--sysdeps/hppa/bits/linkmap.h6
-rw-r--r--sysdeps/hppa/dl-lookupcfg.h6
-rw-r--r--sysdeps/i386/bits/link.h65
-rw-r--r--sysdeps/i386/bits/linkmap.h5
-rw-r--r--sysdeps/i386/dl-machine.h128
-rw-r--r--sysdeps/i386/dl-trampoline.S182
-rw-r--r--sysdeps/ia64/bits/link.h5
-rw-r--r--sysdeps/ia64/bits/linkmap.h5
-rw-r--r--sysdeps/ia64/dl-lookupcfg.h5
-rw-r--r--sysdeps/linkmap.h4
-rw-r--r--sysdeps/powerpc/powerpc64/dl-lookupcfg.h22
-rw-r--r--sysdeps/s390/bits/link.h13
-rw-r--r--sysdeps/s390/bits/linkmap.h13
-rw-r--r--sysdeps/sh/bits/link.h5
-rw-r--r--sysdeps/sh/bits/linkmap.h5
-rw-r--r--sysdeps/x86_64/bits/link.h120
-rw-r--r--sysdeps/x86_64/bits/linkmap.h14
-rw-r--r--sysdeps/x86_64/dl-machine.h136
-rw-r--r--sysdeps/x86_64/dl-trampoline.S188
31 files changed, 991 insertions, 544 deletions
diff --git a/sysdeps/arm/bits/link.h b/sysdeps/arm/bits/link.h
index 648976d7d2..e69de29bb2 100644
--- a/sysdeps/arm/bits/link.h
+++ b/sysdeps/arm/bits/link.h
@@ -1,4 +0,0 @@
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt */
-  };
diff --git a/sysdeps/arm/bits/linkmap.h b/sysdeps/arm/bits/linkmap.h
new file mode 100644
index 0000000000..648976d7d2
--- /dev/null
+++ b/sysdeps/arm/bits/linkmap.h
@@ -0,0 +1,4 @@
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt */
+  };
diff --git a/sysdeps/generic/bits/link.h b/sysdeps/generic/bits/link.h
index 470b4d3e5f..6b4f811c25 100644
--- a/sysdeps/generic/bits/link.h
+++ b/sysdeps/generic/bits/link.h
@@ -1,4 +1 @@
-struct link_map_machine
-  {
-    /* empty by default */
-  };
+#error "Architecture-specific definition needed."
diff --git a/sysdeps/generic/bits/linkmap.h b/sysdeps/generic/bits/linkmap.h
new file mode 100644
index 0000000000..470b4d3e5f
--- /dev/null
+++ b/sysdeps/generic/bits/linkmap.h
@@ -0,0 +1,4 @@
+struct link_map_machine
+  {
+    /* empty by default */
+  };
diff --git a/sysdeps/generic/dl-lookupcfg.h b/sysdeps/generic/dl-lookupcfg.h
index f48cb0a844..39c430b41e 100644
--- a/sysdeps/generic/dl-lookupcfg.h
+++ b/sysdeps/generic/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2002 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,16 +17,4 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/* Some platforms need more information from the symbol lookup function
-   than just the address.  But this is not generally the case.
-
-   However, because of how _dl_sym and _dl_tls_symaddr are written, every
-   platform needs it when we support TLS.  */
-
-#include <tls.h>		/* Defines USE_TLS (or doesn't).  */
-
-#ifdef USE_TLS
-# define DL_LOOKUP_RETURNS_MAP
-#else
-# undef DL_LOOKUP_RETURNS_MAP
-#endif
+/* Nothing special.  */
diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c
index 2282dda9cc..10b7f2c65a 100644
--- a/sysdeps/generic/dl-tls.c
+++ b/sysdeps/generic/dl-tls.c
@@ -1,5 +1,5 @@
 /* Thread-local storage handling in the ELF dynamic linker.  Generic version.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,8 @@
    02111-1307 USA.  */
 
 #include <assert.h>
+#include <errno.h>
+#include <libintl.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -116,10 +118,9 @@ void
 internal_function
 _dl_determine_tlsoffset (void)
 {
-  struct dtv_slotinfo *slotinfo;
   size_t max_align = TLS_TCB_ALIGN;
-  size_t offset, freetop = 0, freebottom = 0;
-  size_t cnt;
+  size_t freetop = 0;
+  size_t freebottom = 0;
 
   /* The first element of the dtv slot info list is allocated.  */
   assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
@@ -127,7 +128,7 @@ _dl_determine_tlsoffset (void)
      dl_tls_dtv_slotinfo_list list.  */
   assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
 
-  slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
+  struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
 
   /* Determining the offset of the various parts of the static TLS
      block has several dependencies.  In addition we have to work
@@ -159,9 +160,9 @@ _dl_determine_tlsoffset (void)
 
 # if TLS_TCB_AT_TP
   /* We simply start with zero.  */
-  offset = 0;
+  size_t offset = 0;
 
-  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
+  for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
     {
       assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
 
@@ -206,9 +207,9 @@ _dl_determine_tlsoffset (void)
 			    + TLS_TCB_SIZE);
 # elif TLS_DTV_AT_TP
   /* The TLS blocks start right after the TCB.  */
-  offset = TLS_TCB_SIZE;
+  size_t offset = TLS_TCB_SIZE;
 
-  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
+  for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
     {
       assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
 
@@ -225,8 +226,8 @@ _dl_determine_tlsoffset (void)
 	  if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
 	    {
 	      slotinfo[cnt].map->l_tls_offset = off - firstbyte;
-	      freebottom = off + slotinfo[cnt].map->l_tls_blocksize
-			   - firstbyte;
+	      freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
+			    - firstbyte);
 	      continue;
 	    }
 	}
@@ -357,14 +358,14 @@ _dl_allocate_tls_storage (void)
 
       /* Clear the TCB data structure.  We can't ask the caller (i.e.
 	 libpthread) to do it, because we will initialize the DTV et al.  */
-      memset (result, 0, TLS_TCB_SIZE);
+      memset (result, '\0', TLS_TCB_SIZE);
 # elif TLS_DTV_AT_TP
       result = (char *) result + size - GL(dl_tls_static_size);
 
       /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
 	 We can't ask the caller (i.e. libpthread) to do it, because we will
 	 initialize the DTV et al.  */
-      memset ((char *) result - TLS_PRE_TCB_SIZE, 0,
+      memset ((char *) result - TLS_PRE_TCB_SIZE, '\0',
 	      TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
 # endif
 
@@ -388,10 +389,11 @@ _dl_allocate_tls_init (void *result)
   dtv_t *dtv = GET_DTV (result);
   struct dtv_slotinfo_list *listp;
   size_t total = 0;
+  size_t maxgen = 0;
 
-  /* We have to look prepare the dtv for all currently loaded
-     modules using TLS.  For those which are dynamically loaded we
-     add the values indicating deferred allocation.  */
+  /* We have to prepare the dtv for all currently loaded modules using
+     TLS.  For those which are dynamically loaded we add the values
+     indicating deferred allocation.  */
   listp = GL(dl_tls_dtv_slotinfo_list);
   while (1)
     {
@@ -411,11 +413,16 @@ _dl_allocate_tls_init (void *result)
 	    /* Unused entry.  */
 	    continue;
 
+	  /* Keep track of the maximum generation number.  This might
+	     not be the generation counter.  */
+	  maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
+
 	  if (map->l_tls_offset == NO_TLS_OFFSET)
 	    {
 	      /* For dynamically loaded modules we simply store
 		 the value indicating deferred allocation.  */
-	      dtv[map->l_tls_modid].pointer = TLS_DTV_UNALLOCATED;
+	      dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
+	      dtv[map->l_tls_modid].pointer.is_static = false;
 	      continue;
 	    }
 
@@ -431,7 +438,8 @@ _dl_allocate_tls_init (void *result)
 # endif
 
 	  /* Copy the initialization image and clear the BSS part.  */
-	  dtv[map->l_tls_modid].pointer = dest;
+	  dtv[map->l_tls_modid].pointer.val = dest;
+	  dtv[map->l_tls_modid].pointer.is_static = true;
 	  memset (__mempcpy (dest, map->l_tls_initimage,
 			     map->l_tls_initimage_size), '\0',
 		  map->l_tls_blocksize - map->l_tls_initimage_size);
@@ -445,6 +453,9 @@ _dl_allocate_tls_init (void *result)
       assert (listp != NULL);
     }
 
+  /* The DTV version is up-to-date now.  */
+  dtv[0].counter = maxgen;
+
   return result;
 }
 rtld_hidden_def (_dl_allocate_tls_init)
@@ -466,6 +477,12 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
 {
   dtv_t *dtv = GET_DTV (tcb);
 
+  /* We need to free the memory allocated for non-static TLS.  */
+  for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
+    if (! dtv[1 + cnt].pointer.is_static
+	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
+      free (dtv[1 + cnt].pointer.val);
+
   /* The array starts with dtv[-1].  */
 #ifdef SHARED
   if (dtv != GL(dl_initial_dtv))
@@ -524,166 +541,172 @@ allocate_and_init (struct link_map *map)
 }
 
 
-/* The generic dynamic and local dynamic model cannot be used in
-   statically linked applications.  */
-void *
-__tls_get_addr (GET_ADDR_ARGS)
+struct link_map *
+_dl_update_slotinfo (unsigned long int req_modid)
 {
-  dtv_t *dtv = THREAD_DTV ();
   struct link_map *the_map = NULL;
-  void *p;
+  dtv_t *dtv = THREAD_DTV ();
 
-  if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
+  /* The global dl_tls_dtv_slotinfo array contains for each module
+     index the generation counter current when the entry was created.
+     This array never shrinks so that all module indices which were
+     valid at some time can be used to access it.  Before the first
+     use of a new module index in this function the array was extended
+     appropriately.  Access also does not have to be guarded against
+     modifications of the array.  It is assumed that pointer-size
+     values can be read atomically even in SMP environments.  It is
+     possible that other threads at the same time dynamically load
+     code and therefore add to the slotinfo list.  This is a problem
+     since we must not pick up any information about incomplete work.
+     The solution to this is to ignore all dtv slots which were
+     created after the one we are currently interested.  We know that
+     dynamic loading for this module is completed and this is the last
+     load operation we know finished.  */
+  unsigned long int idx = req_modid;
+  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+
+  while (idx >= listp->len)
     {
-      struct dtv_slotinfo_list *listp;
-      size_t idx;
-
-      /* The global dl_tls_dtv_slotinfo array contains for each module
-	 index the generation counter current when the entry was
-	 created.  This array never shrinks so that all module indices
-	 which were valid at some time can be used to access it.
-	 Before the first use of a new module index in this function
-	 the array was extended appropriately.  Access also does not
-	 have to be guarded against modifications of the array.  It is
-	 assumed that pointer-size values can be read atomically even
-	 in SMP environments.  It is possible that other threads at
-	 the same time dynamically load code and therefore add to the
-	 slotinfo list.  This is a problem since we must not pick up
-	 any information about incomplete work.  The solution to this
-	 is to ignore all dtv slots which were created after the one
-	 we are currently interested.  We know that dynamic loading
-	 for this module is completed and this is the last load
-	 operation we know finished.  */
-      idx = GET_ADDR_MODULE;
-      listp = GL(dl_tls_dtv_slotinfo_list);
-      while (idx >= listp->len)
-	{
-	  idx -= listp->len;
-	  listp = listp->next;
-	}
+      idx -= listp->len;
+      listp = listp->next;
+    }
 
-      if (dtv[0].counter < listp->slotinfo[idx].gen)
+  if (dtv[0].counter < listp->slotinfo[idx].gen)
+    {
+      /* The generation counter for the slot is higher than what the
+	 current dtv implements.  We have to update the whole dtv but
+	 only those entries with a generation counter <= the one for
+	 the entry we need.  */
+      size_t new_gen = listp->slotinfo[idx].gen;
+      size_t total = 0;
+
+      /* We have to look through the entire dtv slotinfo list.  */
+      listp =  GL(dl_tls_dtv_slotinfo_list);
+      do
 	{
-	  /* The generation counter for the slot is higher than what
-	     the current dtv implements.  We have to update the whole
-	     dtv but only those entries with a generation counter <=
-	     the one for the entry we need.  */
-	  size_t new_gen = listp->slotinfo[idx].gen;
-	  size_t total = 0;
-
-	  /* We have to look through the entire dtv slotinfo list.  */
-	  listp =  GL(dl_tls_dtv_slotinfo_list);
-	  do
+	  for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
 	    {
-	      size_t cnt;
-
-	      for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
+	      size_t gen = listp->slotinfo[cnt].gen;
+
+	      if (gen > new_gen)
+		/* This is a slot for a generation younger than the
+		   one we are handling now.  It might be incompletely
+		   set up so ignore it.  */
+		continue;
+
+	      /* If the entry is older than the current dtv layout we
+		 know we don't have to handle it.  */
+	      if (gen <= dtv[0].counter)
+		continue;
+
+	      /* If there is no map this means the entry is empty.  */
+	      struct link_map *map = listp->slotinfo[cnt].map;
+	      if (map == NULL)
 		{
-		  size_t gen = listp->slotinfo[cnt].gen;
-		  struct link_map *map;
-		  size_t modid;
-
-		  if (gen > new_gen)
-		    /* This is a slot for a generation younger than
-		       the one we are handling now.  It might be
-		       incompletely set up so ignore it.  */
-		    continue;
-
-		  /* If the entry is older than the current dtv layout
-		     we know we don't have to handle it.  */
-		  if (gen <= dtv[0].counter)
-		    continue;
-
-		  /* If there is no map this means the entry is empty.  */
-		  map = listp->slotinfo[cnt].map;
-		  if (map == NULL)
+		  /* If this modid was used at some point the memory
+		     might still be allocated.  */
+		  if (! dtv[total + cnt].pointer.is_static
+		      && dtv[total + cnt].pointer.val != TLS_DTV_UNALLOCATED)
 		    {
-		      /* If this modid was used at some point the memory
-			 might still be allocated.  */
-		      if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
-			{
-			  free (dtv[total + cnt].pointer);
-			  dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
-			}
-
-		      continue;
+		      free (dtv[total + cnt].pointer.val);
+		      dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
 		    }
 
-		  /* Check whether the current dtv array is large enough.  */
-		  modid = map->l_tls_modid;
-		  assert (total + cnt == modid);
-		  if (dtv[-1].counter < modid)
+		  continue;
+		}
+
+	      /* Check whether the current dtv array is large enough.  */
+	      size_t modid = map->l_tls_modid;
+	      assert (total + cnt == modid);
+	      if (dtv[-1].counter < modid)
+		{
+		  /* Reallocate the dtv.  */
+		  dtv_t *newp;
+		  size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+		  size_t oldsize = dtv[-1].counter;
+
+		  assert (map->l_tls_modid <= newsize);
+
+		  if (dtv == GL(dl_initial_dtv))
 		    {
-		      /* Reallocate the dtv.  */
-		      dtv_t *newp;
-		      size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
-		      size_t oldsize = dtv[-1].counter;
-
-		      assert (map->l_tls_modid <= newsize);
-
-		      if (dtv == GL(dl_initial_dtv))
-			{
-			  /* This is the initial dtv that was allocated
-			     during rtld startup using the dl-minimal.c
-			     malloc instead of the real malloc.  We can't
-			     free it, we have to abandon the old storage.  */
-
-			  newp = malloc ((2 + newsize) * sizeof (dtv_t));
-			  if (newp == NULL)
-			    oom ();
-			  memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
-			}
-		      else
-			{
-			  newp = realloc (&dtv[-1],
-					  (2 + newsize) * sizeof (dtv_t));
-			  if (newp == NULL)
-			    oom ();
-			}
-
-		      newp[0].counter = newsize;
-
-		      /* Clear the newly allocated part.  */
-		      memset (newp + 2 + oldsize, '\0',
-			      (newsize - oldsize) * sizeof (dtv_t));
-
-		      /* Point dtv to the generation counter.  */
-		      dtv = &newp[1];
-
-		      /* Install this new dtv in the thread data
-			 structures.  */
-		      INSTALL_NEW_DTV (dtv);
+		      /* This is the initial dtv that was allocated
+			 during rtld startup using the dl-minimal.c
+			 malloc instead of the real malloc.  We can't
+			 free it, we have to abandon the old storage.  */
+
+		      newp = malloc ((2 + newsize) * sizeof (dtv_t));
+		      if (newp == NULL)
+			oom ();
+		      memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
 		    }
+		  else
+		    {
+		      newp = realloc (&dtv[-1],
+				      (2 + newsize) * sizeof (dtv_t));
+		      if (newp == NULL)
+			oom ();
+		    }
+
+		  newp[0].counter = newsize;
+
+		  /* Clear the newly allocated part.  */
+		  memset (newp + 2 + oldsize, '\0',
+			  (newsize - oldsize) * sizeof (dtv_t));
+
+		  /* Point dtv to the generation counter.  */
+		  dtv = &newp[1];
 
-		  /* If there is currently memory allocate for this
-		     dtv entry free it.  */
-		  /* XXX Ideally we will at some point create a memory
-		     pool.  */
-		  if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
-		    /* Note that free is called for NULL is well.  We
-		       deallocate even if it is this dtv entry we are
-		       supposed to load.  The reason is that we call
-		       memalign and not malloc.  */
-		    free (dtv[modid].pointer);
-
-		  /* This module is loaded dynamically- We defer
-		     memory allocation.  */
-		  dtv[modid].pointer = TLS_DTV_UNALLOCATED;
-
-		  if (modid == GET_ADDR_MODULE)
-		    the_map = map;
+		  /* Install this new dtv in the thread data
+		     structures.  */
+		  INSTALL_NEW_DTV (dtv);
 		}
 
-	      total += listp->len;
+	      /* If there is currently memory allocate for this
+		 dtv entry free it.  */
+	      /* XXX Ideally we will at some point create a memory
+		 pool.  */
+	      if (! dtv[modid].pointer.is_static
+		  && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
+		/* Note that free is called for NULL is well.  We
+		   deallocate even if it is this dtv entry we are
+		   supposed to load.  The reason is that we call
+		   memalign and not malloc.  */
+		free (dtv[modid].pointer.val);
+
+	      /* This module is loaded dynamically- We defer memory
+		 allocation.  */
+	      dtv[modid].pointer.is_static = false;
+	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
+
+	      if (modid == req_modid)
+		the_map = map;
 	    }
-	  while ((listp = listp->next) != NULL);
 
-	  /* This will be the new maximum generation counter.  */
-	  dtv[0].counter = new_gen;
+	  total += listp->len;
 	}
+      while ((listp = listp->next) != NULL);
+
+      /* This will be the new maximum generation counter.  */
+      dtv[0].counter = new_gen;
     }
 
-  p = dtv[GET_ADDR_MODULE].pointer;
+  return the_map;
+}
+
+
+/* The generic dynamic and local dynamic model cannot be used in
+   statically linked applications.  */
+void *
+__tls_get_addr (GET_ADDR_ARGS)
+{
+  dtv_t *dtv = THREAD_DTV ();
+  struct link_map *the_map = NULL;
+  void *p;
+
+  if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
+    the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
+
+  p = dtv[GET_ADDR_MODULE].pointer.val;
 
   if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
     {
@@ -703,11 +726,74 @@ __tls_get_addr (GET_ADDR_ARGS)
 	  the_map = listp->slotinfo[idx].map;
 	}
 
-      p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
+      p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
+      dtv[GET_ADDR_MODULE].pointer.is_static = false;
     }
 
   return (char *) p + GET_ADDR_OFFSET;
 }
 # endif
 
+
+
+void
+_dl_add_to_slotinfo (struct link_map  *l)
+{
+  /* Now that we know the object is loaded successfully add
+     modules containing TLS data to the dtv info table.  We
+     might have to increase its size.  */
+  struct dtv_slotinfo_list *listp;
+  struct dtv_slotinfo_list *prevp;
+  size_t idx = l->l_tls_modid;
+
+  /* Find the place in the dtv slotinfo list.  */
+  listp = GL(dl_tls_dtv_slotinfo_list);
+  prevp = NULL;		/* Needed to shut up gcc.  */
+  do
+    {
+      /* Does it fit in the array of this list element?  */
+      if (idx < listp->len)
+	break;
+      idx -= listp->len;
+      prevp = listp;
+      listp = listp->next;
+    }
+  while (listp != NULL);
+
+  if (listp == NULL)
+    {
+      /* When we come here it means we have to add a new element
+	 to the slotinfo list.  And the new module must be in
+	 the first slot.  */
+      assert (idx == 0);
+
+      listp = prevp->next = (struct dtv_slotinfo_list *)
+	malloc (sizeof (struct dtv_slotinfo_list)
+		+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+      if (listp == NULL)
+	{
+	  /* We ran out of memory.  We will simply fail this
+	     call but don't undo anything we did so far.  The
+	     application will crash or be terminated anyway very
+	     soon.  */
+
+	  /* We have to do this since some entries in the dtv
+	     slotinfo array might already point to this
+	     generation.  */
+	  ++GL(dl_tls_generation);
+
+	  _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
+cannot create TLS data structures"));
+	}
+
+      listp->len = TLS_SLOTINFO_SURPLUS;
+      listp->next = NULL;
+      memset (listp->slotinfo, '\0',
+	      TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+    }
+
+  /* Add the information into the slotinfo data structure.  */
+  listp->slotinfo[idx].map = l;
+  listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
+}
 #endif	/* use TLS */
diff --git a/sysdeps/generic/dl-trampoline.c b/sysdeps/generic/dl-trampoline.c
new file mode 100644
index 0000000000..3ca89f3879
--- /dev/null
+++ b/sysdeps/generic/dl-trampoline.c
@@ -0,0 +1 @@
+#error "Architecture specific PLT trampolines must be defined."
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index ec68e1a565..4a181f4009 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -52,23 +52,15 @@ __BEGIN_DECLS
   most architectures the entry is already relocated - but for some not
   and we need to relocate at access time.  */
 #ifdef DL_RO_DYN_SECTION
-# define D_PTR(map,i) (map->i->d_un.d_ptr + map->l_addr)
+# define D_PTR(map, i) ((map)->i->d_un.d_ptr + (map)->l_addr)
 #else
-# define D_PTR(map,i) map->i->d_un.d_ptr
+# define D_PTR(map, i) (map)->i->d_un.d_ptr
 #endif
 
-/* On some platforms more information than just the address of the symbol
-   is needed from the lookup functions.  In this case we return the whole
-   link map.  */
-#ifdef DL_LOOKUP_RETURNS_MAP
+/* Result of the lookup functions and how to retrieve the base address.  */
 typedef struct link_map *lookup_t;
 # define LOOKUP_VALUE(map) map
-# define LOOKUP_VALUE_ADDRESS(map) (map ? map->l_addr : 0)
-#else
-typedef ElfW(Addr) lookup_t;
-# define LOOKUP_VALUE(map) map->l_addr
-# define LOOKUP_VALUE_ADDRESS(address) address
-#endif
+# define LOOKUP_VALUE_ADDRESS(map) ((map) ? (map)->l_addr : 0)
 
 /* on some architectures a pointer to a function is not just a pointer
    to the actual code of the function but rather an architecture
@@ -182,6 +174,55 @@ enum allowmask
   };
 
 
+/* Type for list of auditing interfaces.  */
+struct La_i86_regs;
+struct La_i86_retval;
+struct La_x86_64_regs;
+struct La_x86_64_retval;
+
+
+struct audit_ifaces
+{
+  void (*activity) (uintptr_t *, unsigned int);
+  char *(*objsearch) (const char *, uintptr_t *, unsigned int);
+  unsigned int (*objopen) (struct link_map *, Lmid_t, uintptr_t *);
+  void (*preinit) (uintptr_t *);
+  union
+  {
+    uintptr_t (*symbind32) (Elf32_Sym *, unsigned int, uintptr_t *,
+			    uintptr_t *, unsigned int *, const char *);
+    uintptr_t (*symbind64) (Elf64_Sym *, unsigned int, uintptr_t *,
+			    uintptr_t *, unsigned int *, const char *);
+  };
+  union
+  {
+    uintptr_t (*i86_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *,
+				   uintptr_t *, const struct La_i86_regs *,
+				   unsigned int *, const char *name,
+				   long int *framesizep);
+    uintptr_t (*x86_64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *,
+				      uintptr_t *,
+				      const struct La_x86_64_regs *,
+				      unsigned int *, const char *name,
+				      long int *framesizep);
+  };
+  union
+  {
+    unsigned int (*i86_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *,
+				     uintptr_t *, const struct La_i86_regs *,
+				     struct La_i86_retval *, const char *);
+    unsigned int (*x86_64_gnu_pltexit) (Elf64_Sym *, unsigned int, uintptr_t *,
+					uintptr_t *,
+					const struct La_x86_64_regs *,
+					struct La_x86_64_retval *,
+					const char *);
+  };
+  unsigned int (*objclose) (uintptr_t *);
+
+  struct audit_ifaces *next;
+};
+
+
 /* Test whether given NAME matches any of the names of the given object.  */
 extern int _dl_name_match_p (const char *__name, struct link_map *__map)
      internal_function;
@@ -224,7 +265,7 @@ struct rtld_global
 #endif
   EXTERN struct link_namespaces
   {
-    /* And a pointer to the map for the main map.  */
+    /* A pointer to the map for the main map.  */
     struct link_map *_ns_loaded;
     /* Number of object in the _dl_loaded list.  */
     unsigned int _ns_nloaded;
@@ -277,8 +318,12 @@ struct rtld_global
   EXTERN void **(*_dl_error_catch_tsd) (void) __attribute__ ((const));
 #endif
 
-  /* Structure describing the dynamic linker itself.  */
+  /* Structure describing the dynamic linker itself.  We need to
+     reserve memory for the data the audit libraries need.  */
   EXTERN struct link_map _dl_rtld_map;
+#ifdef SHARED
+  struct auditstate audit_data[DL_NNS];
+#endif
 
 #if defined SHARED && defined _LIBC_REENTRANT \
     && defined __rtld_lock_default_lock_recursive
@@ -311,6 +356,7 @@ struct rtld_global
     struct dtv_slotinfo
     {
       size_t gen;
+      bool is_static;
       struct link_map *map;
     } slotinfo[0];
   } *_dl_tls_dtv_slotinfo_list;
@@ -483,32 +529,12 @@ struct rtld_global_ro
      call the function instead of going through the PLT.  The result
      is that we can avoid exporting the functions and we do not jump
      PLT relocations in libc.so.  */
-  const char *(*_dl_get_origin) (void);
-  size_t (*_dl_dst_count) (const char *, int);
-  char *(*_dl_dst_substitute) (struct link_map *, const char *, char *, int);
-  struct link_map *(internal_function *_dl_map_object) (struct link_map *,
-							const char *, int,
-							int, int, int, Lmid_t);
-  void (internal_function *_dl_map_object_deps) (struct link_map *,
-						 struct link_map **,
-						 unsigned int, int, int);
-  void (*_dl_relocate_object) (struct link_map *, struct r_scope_elem *[],
-			       int, int);
-  int (internal_function *_dl_check_map_versions) (struct link_map *, int,
-						   int);
-  void (internal_function *_dl_init) (struct link_map *, int, char **,
-					char **);
-  void (*_dl_debug_state) (void);
-#ifndef MAP_COPY
-  void (*_dl_unload_cache) (void);
-#endif
   void (*_dl_debug_printf) (const char *, ...)
        __attribute__ ((__format__ (__printf__, 1, 2)));
   int (internal_function *_dl_catch_error) (const char **, const char **,
 					    void (*) (void *), void *);
   void (internal_function *_dl_signal_error) (int, const char *, const char *,
 					      const char *);
-  void (internal_function *_dl_start_profile) (void);
   void (*_dl_mcount) (ElfW(Addr) frompc, ElfW(Addr) selfpc);
   lookup_t (internal_function *_dl_lookup_symbol_x) (const char *,
 						     struct link_map *,
@@ -518,7 +544,13 @@ struct rtld_global_ro
 						     int, int,
 						     struct link_map *);
   int (*_dl_check_caller) (const void *, enum allowmask);
+  void *(*_dl_open) (const char *file, int mode, const void *caller_dlopen,
+		     Lmid_t nsid, int argc, char *argv[], char *env[]);
+  void (*_dl_close) (void *map);
 
+  /* List of auditing interfaces.  */
+  struct audit_ifaces *_dl_audit;
+  unsigned int _dl_naudit;
 };
 # define __rtld_global_attribute__
 # ifdef IS_IN_rtld
@@ -911,6 +943,20 @@ extern char *_dl_dst_substitute (struct link_map *l, const char *name,
 extern int _dl_check_caller (const void *caller, enum allowmask mask)
      attribute_hidden;
 
+/* Open the shared object NAME, relocate it, and run its initializer if it
+   hasn't already been run.  MODE is as for `dlopen' (see <dlfcn.h>).  If
+   the object is already opened, returns its existing map.  */
+extern void *_dl_open (const char *name, int mode, const void *caller,
+		       Lmid_t nsid, int argc, char *argv[], char *env[])
+     attribute_hidden;
+
+/* Add module to slot information data.  */
+extern void _dl_add_to_slotinfo (struct link_map  *l) attribute_hidden;
+
+/* Update slot information data for at least the generation of the
+   module with the given index.  */
+extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid);
+
 __END_DECLS
 
 #endif /* ldsodefs.h */
diff --git a/sysdeps/generic/libc-start.c b/sysdeps/generic/libc-start.c
index ad5ebe0911..5bb8a9b352 100644
--- a/sysdeps/generic/libc-start.c
+++ b/sysdeps/generic/libc-start.c
@@ -80,6 +80,10 @@ STATIC int LIBC_START_MAIN (int (*main) (int, char **, char **
 			    void *__unbounded stack_end)
      __attribute__ ((noreturn));
 
+
+/* Note: the fini parameter is ignored here.  It used to be registered
+   with __cxa_atexit.  This had the disadvantage that finalizers were
+   called in more than one place.  */
 STATIC int
 LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 		 int argc, char *__unbounded *__unbounded ubp_av,
@@ -158,10 +162,6 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
   __libc_init_first (argc, argv, __environ);
 #endif
 
-  /* Register the destructor of the program, if any.  */
-  if (fini)
-    __cxa_atexit ((void (*) (void *)) fini, NULL, NULL);
-
 #ifndef SHARED
   /* Some security at this point.  Prevent starting a SUID binary where
      the standard file descriptors are not opened.  We have to do this
@@ -184,6 +184,22 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 	     );
 
 #ifdef SHARED
+  /* Auditing checkpoint: we have a new object.  */
+  if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
+    {
+      struct audit_ifaces *afct = GLRO(dl_audit);
+      struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+	{
+	  if (afct->preinit != NULL)
+	    afct->preinit (&head->l_audit[cnt].cookie);
+
+	  afct = afct->next;
+	}
+    }
+#endif
+
+#ifdef SHARED
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS, 0))
     GLRO(dl_debug_printf) ("\ntransferring control: %s\n\n", argv[0]);
 #endif
diff --git a/sysdeps/generic/libc-tls.c b/sysdeps/generic/libc-tls.c
index b5ecc36436..b88ede06a2 100644
--- a/sysdeps/generic/libc-tls.c
+++ b/sysdeps/generic/libc-tls.c
@@ -1,5 +1,5 @@
 /* Initialization code for TLS in statically linked application.
-   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -178,17 +178,18 @@ __libc_setup_tls (size_t tcbsize, size_t tcbalign)
 
   /* Initialize the TLS block.  */
 # if TLS_TCB_AT_TP
-  static_dtv[2].pointer = ((char *) tlsblock + tcb_offset
-			   - roundup (memsz, align ?: 1));
+  static_dtv[2].pointer.val = ((char *) tlsblock + tcb_offset
+			       - roundup (memsz, align ?: 1));
   static_map.l_tls_offset = roundup (memsz, align ?: 1);
 # elif TLS_DTV_AT_TP
-  static_dtv[2].pointer = (char *) tlsblock + tcb_offset;
+  static_dtv[2].pointer.val = (char *) tlsblock + tcb_offset;
   static_map.l_tls_offset = tcb_offset;
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
+  static_dtv[2].pointer.is_static = true;
   /* sbrk gives us zero'd memory, so we don't need to clear the remainder.  */
-  memcpy (static_dtv[2].pointer, initimage, filesz);
+  memcpy (static_dtv[2].pointer.val, initimage, filesz);
 
   /* Install the pointer to the dtv.  */
 
diff --git a/sysdeps/generic/unsecvars.h b/sysdeps/generic/unsecvars.h
index eb77b260d8..a7378b742f 100644
--- a/sysdeps/generic/unsecvars.h
+++ b/sysdeps/generic/unsecvars.h
@@ -2,18 +2,19 @@
    all stuffed in a single string which means they have to be terminated
    with a '\0' explicitly.  */
 #define UNSECURE_ENVVARS \
-  "LD_PRELOAD\0"							      \
-  "LD_LIBRARY_PATH\0"							      \
-  "LD_ORIGIN_PATH\0"							      \
+  "GCONV_PATH\0"							      \
+  "GETCONF_DIR\0"							      \
+  "HOSTALIASES\0"							      \
+  "LD_AUDIT\0"								      \
   "LD_DEBUG\0"								      \
   "LD_DEBUG_OUTPUT\0"							      \
-  "LD_PROFILE\0"							      \
-  "LD_USE_LOAD_BIAS\0"							      \
   "LD_DYNAMIC_WEAK\0"							      \
+  "LD_LIBRARY_PATH\0"							      \
+  "LD_ORIGIN_PATH\0"							      \
+  "LD_PRELOAD\0"							      \
+  "LD_PROFILE\0"							      \
   "LD_SHOW_AUXV\0"							      \
-  "GCONV_PATH\0"							      \
-  "GETCONF_DIR\0"							      \
-  "HOSTALIASES\0"							      \
+  "LD_USE_LOAD_BIAS\0"							      \
   "LOCALDOMAIN\0"							      \
   "LOCPATH\0"								      \
   "MALLOC_TRACE\0"							      \
diff --git a/sysdeps/hppa/bits/link.h b/sysdeps/hppa/bits/link.h
index 54842b2299..e69de29bb2 100644
--- a/sysdeps/hppa/bits/link.h
+++ b/sysdeps/hppa/bits/link.h
@@ -1,6 +0,0 @@
-/* Used to store the function descriptor table */
-struct link_map_machine
-  {
-    size_t fptr_table_len;
-    ElfW(Addr) *fptr_table;
-  };
diff --git a/sysdeps/hppa/bits/linkmap.h b/sysdeps/hppa/bits/linkmap.h
new file mode 100644
index 0000000000..54842b2299
--- /dev/null
+++ b/sysdeps/hppa/bits/linkmap.h
@@ -0,0 +1,6 @@
+/* Used to store the function descriptor table */
+struct link_map_machine
+  {
+    size_t fptr_table_len;
+    ElfW(Addr) *fptr_table;
+  };
diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h
index d393b3e427..84436e7c56 100644
--- a/sysdeps/hppa/dl-lookupcfg.h
+++ b/sysdeps/hppa/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2000 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,9 +17,6 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/* Like IA-64, PA-RISC needs more information from the symbol lookup
-   function than just the address. */
-#define DL_LOOKUP_RETURNS_MAP
 #define ELF_FUNCTION_PTR_IS_SPECIAL
 #define DL_UNMAP_IS_SPECIAL
 
@@ -66,4 +63,3 @@ void _dl_unmap (struct link_map *map);
   ((Elf32_Addr)(addr) & 2 ? (addr) : DL_AUTO_FUNCTION_ADDRESS (map, addr))
 #define DL_DT_FINI_ADDRESS(map, addr) \
   ((Elf32_Addr)(addr) & 2 ? (addr) : DL_AUTO_FUNCTION_ADDRESS (map, addr))
-
diff --git a/sysdeps/i386/bits/link.h b/sysdeps/i386/bits/link.h
index 3be9b7eae8..985d040413 100644
--- a/sysdeps/i386/bits/link.h
+++ b/sysdeps/i386/bits/link.h
@@ -1,5 +1,60 @@
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 0x16 */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
+/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef	_LINK_H
+# error "Never include <bits/link.h> directly; use <link.h> instead."
+#endif
+
+
+/* Registers for entry into PLT on IA-32.  */
+typedef struct La_i86_regs
+{
+  uint32_t lr_edx;
+  uint32_t lr_ecx;
+  uint32_t lr_eax;
+  uint32_t lr_ebp;
+  uint32_t lr_esp;
+} La_i86_regs;
+
+/* Return values for calls from PLT on IA-32.  */
+typedef struct La_i86_retval
+{
+  uint32_t lrv_eax;
+  uint32_t lrv_edx;
+  long double lrv_st0;
+  long double lrv_st1;
+} La_i86_retval;
+
+
+__BEGIN_DECLS
+
+extern Elf32_Addr la_i86_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx,
+				       uintptr_t *__refcook,
+				       uintptr_t *__defcook,
+				       La_i86_regs *__regs,
+				       unsigned int *__flags,
+				       const char *__symname,
+				       long int *__framesizep);
+extern unsigned int la_i86_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx,
+					uintptr_t *__refcook,
+					uintptr_t *__defcook,
+					const La_i86_regs *__inregs,
+					La_i86_retval *__outregs,
+					const char *symname);
+
+__END_DECLS
diff --git a/sysdeps/i386/bits/linkmap.h b/sysdeps/i386/bits/linkmap.h
new file mode 100644
index 0000000000..3be9b7eae8
--- /dev/null
+++ b/sysdeps/i386/bits/linkmap.h
@@ -0,0 +1,5 @@
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 0x16 */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index c48d9d325e..e1cc10e9cc 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -129,7 +129,8 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
 
-	  if (_dl_name_match_p (GLRO(dl_profile), l))
+	  if (GLRO(dl_profile) != NULL
+	      && _dl_name_match_p (GLRO(dl_profile), l))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = l;
@@ -154,112 +155,18 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
    destroys the passed register information.  */
 /* GKM FIXME: Fix trampoline to pass bounds so we can do
    without the `__unbounded' qualifier.  */
-#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), unused))
+#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused))
 
-static ElfW(Addr) fixup (struct link_map *__unbounded l,
-			 ElfW(Word) reloc_offset)
+extern ElfW(Addr) _dl_fixup (struct link_map *__unbounded l,
+			     ElfW(Word) reloc_offset)
      ARCH_FIXUP_ATTRIBUTE;
-static ElfW(Addr) profile_fixup (struct link_map *l, ElfW(Word) reloc_offset,
-				 ElfW(Addr) retaddr)
+extern ElfW(Addr) _dl_profile_fixup (struct link_map *l,
+				     ElfW(Word) reloc_offset,
+				     ElfW(Addr) retaddr, const void *regs,
+				     long int *framesizep)
      ARCH_FIXUP_ATTRIBUTE;
 # endif
 
-/* This code is used in dl-runtime.c to call the `fixup' function
-   and then redirect to the address it returns.  */
-# if !defined PROF && !__BOUNDED_POINTERS__
-#  define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
-	.text\n\
-	.globl _dl_runtime_resolve\n\
-	.type _dl_runtime_resolve, @function\n\
-	" CFI_STARTPROC "\n\
-	.align 16\n\
-_dl_runtime_resolve:\n\
-	" CFI_ADJUST_CFA_OFFSET (8) "\n\
-	pushl %eax		# Preserve registers otherwise clobbered.\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %edx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note\n\
-	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.\n\
-	call fixup		# Call resolver.\n\
-	popl %edx		# Get register content back.\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	popl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	xchgl %eax, (%esp)	# Get %eax contents end store function address.\n\
-	ret $8			# Jump to function address.\n\
-	" CFI_ENDPROC "\n\
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
-\n\
-	.globl _dl_runtime_profile\n\
-	.type _dl_runtime_profile, @function\n\
-	" CFI_STARTPROC "\n\
-	.align 16\n\
-_dl_runtime_profile:\n\
-	" CFI_ADJUST_CFA_OFFSET (8) "\n\
-	pushl %eax		# Preserve registers otherwise clobbered.\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %edx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	movl 20(%esp), %ecx	# Load return address\n\
-	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note\n\
-	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.\n\
-	call profile_fixup	# Call resolver.\n\
-	popl %edx		# Get register content back.\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	popl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	xchgl %eax, (%esp)	# Get %eax contents end store function address.\n\
-	ret $8			# Jump to function address.\n\
-	" CFI_ENDPROC "\n\
-	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
-	.previous\n\
-");
-# else
-#  define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
-	.text\n\
-	.globl _dl_runtime_resolve\n\
-	.globl _dl_runtime_profile\n\
-	.type _dl_runtime_resolve, @function\n\
-	.type _dl_runtime_profile, @function\n\
-	" CFI_STARTPROC "\n\
-	.align 16\n\
-_dl_runtime_resolve:\n\
-_dl_runtime_profile:\n\
-	" CFI_ADJUST_CFA_OFFSET (8) "\n\
-	pushl %eax		# Preserve registers otherwise clobbered.\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %edx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	movl 16(%esp), %edx	# Push the arguments for `fixup'\n\
-	movl 12(%esp), %eax\n\
-	pushl %edx\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	pushl %eax\n\
-	" CFI_ADJUST_CFA_OFFSET (4) "\n\
-	call fixup		# Call resolver.\n\
-	popl %edx		# Pop the parameters\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	popl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	popl %edx		# Get register content back.\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	popl %ecx\n\
-	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
-	xchgl %eax, (%esp)	# Get %eax contents end store function address.\n\
-	ret $8			# Jump to function address.\n\
-	" CFI_ENDPROC "\n\
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
-	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
-	.previous\n\
-");
-# endif
 #endif
 
 /* Mask identifying addresses reserved for the user program,
@@ -375,9 +282,14 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc,
   return value;
 }
 
+
+/* Names of the architecture-specific auditing callback functions.  */
+#define ARCH_LA_PLTENTER i86_gnu_pltenter
+#define ARCH_LA_PLTEXIT i86_gnu_pltexit
+
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE
+#ifdef RESOLVE_MAP
 
 /* The i386 never uses Elf32_Rela relocations for the dynamic linker.
    Prelinked libraries may use Elf32_Rela though.  */
@@ -422,7 +334,7 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
 #endif	/* !RTLD_BOOTSTRAP and have no -z combreloc */
     {
       const Elf32_Sym *const refsym = sym;
-#if defined USE_TLS && !defined RTLD_BOOTSTRAP
+#ifndef RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
 #else
@@ -549,14 +461,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 # ifndef RESOLVE_CONFLICT_FIND_MAP
       const Elf32_Sym *const refsym = sym;
 # endif
-# ifdef USE_TLS
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
-# else
-      Elf32_Addr value = RESOLVE (&sym, version, r_type);
-      if (sym != NULL)
-	value += sym->st_value;
-# endif
 
       switch (ELF32_R_TYPE (reloc->r_info))
 	{
@@ -692,4 +598,4 @@ elf_machine_lazy_rela (struct link_map *map,
 
 #endif	/* !RTLD_BOOTSTRAP */
 
-#endif /* RESOLVE */
+#endif /* RESOLVE_MAP */
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
new file mode 100644
index 0000000000..80dd300e86
--- /dev/null
+++ b/sysdeps/i386/dl-trampoline.S
@@ -0,0 +1,182 @@
+/* PLT trampolines.  i386 version.
+   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.text
+	.globl _dl_runtime_resolve
+	.type _dl_runtime_resolve, @function
+	cfi_startproc
+	.align 16
+_dl_runtime_resolve:
+	cfi_adjust_cfa_offset (8)
+	pushl %eax		# Preserve registers otherwise clobbered.
+	cfi_adjust_cfa_offset (4)
+	pushl %ecx
+	cfi_adjust_cfa_offset (4)
+	pushl %edx
+	cfi_adjust_cfa_offset (4)
+	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note
+	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.
+	call _dl_fixup		# Call resolver.
+	popl %edx		# Get register content back.
+	cfi_adjust_cfa_offset (-4)
+	popl %ecx
+	cfi_adjust_cfa_offset (-4)
+	xchgl %eax, (%esp)	# Get %eax contents end store function address.
+	ret $8			# Jump to function address.
+	cfi_endproc
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve
+
+
+	.globl _dl_runtime_profile
+	.type _dl_runtime_profile, @function
+	cfi_startproc
+	.align 16
+_dl_runtime_profile:
+	cfi_adjust_cfa_offset (8)
+	pushl %esp
+	cfi_adjust_cfa_offset (4)
+	addl $8, (%esp)		# Account for the pushed PLT data
+	pushl %ebp
+	cfi_adjust_cfa_offset (4)
+	pushl %eax		# Preserve registers otherwise clobbered.
+	cfi_adjust_cfa_offset (4)
+	pushl %ecx
+	cfi_adjust_cfa_offset (4)
+	pushl %edx
+	cfi_adjust_cfa_offset (4)
+	movl %esp, %ecx
+	subl $8, %esp
+	cfi_adjust_cfa_offset (8)
+	movl $-1, 4(%esp)
+	leal 4(%esp), %edx
+	movl %edx, (%esp)
+	pushl %ecx		# Address of the register structure
+	cfi_adjust_cfa_offset (4)
+	movl 40(%esp), %ecx	# Load return address
+	movl 36(%esp), %edx	# Copy args pushed by PLT in register.  Note
+	movl 32(%esp), %eax	# that `fixup' takes its parameters in regs.
+	call _dl_profile_fixup	# Call resolver.
+	cfi_adjust_cfa_offset (-8)
+	movl (%esp), %edx
+	testl %edx, %edx
+	jns 1f
+	popl %edx
+	cfi_adjust_cfa_offset (-4)
+	popl %edx		# Get register content back.
+	cfi_adjust_cfa_offset (-4)
+	popl %ecx
+	cfi_adjust_cfa_offset (-4)
+	xchgl %eax, (%esp)	# Get %eax contents end store function address.
+	ret $16			# Jump to function address.
+
+	/*
+	    +32     return address
+	    +28     PLT1
+	    +24     PLT2
+	    +20     %esp
+	    +16     %ebp
+	    +12     %eax
+	    +8      %ecx
+	    +4      %edx
+	   %esp     free
+	*/
+	cfi_adjust_cfa_offset (12)
+1:	movl %ebx, (%esp)
+	cfi_rel_offset (3, 0)
+	movl %edx, %ebx		# This is the frame buffer size
+	pushl %edi
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (7, 0)
+	pushl %esi
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (6, 0)
+	leal 44(%esp), %esi
+	movl %ebx, %ecx
+	movl %esp, %edi
+	subl %ebx, %edi
+	andl $0xfffffff0, %edi	# Align stack
+	movl %esp, %ebx
+	cfi_def_cfa_register (3)
+	movl %edi, %esp
+	shrl $2, %ecx
+	rep
+	movsl
+	movl (%edi), %esi
+	cfi_restore (6)
+	movl 4(%edi), %edi
+	cfi_restore (7)
+	/*
+	   %ebx+40  return address
+	   %ebx+36  PLT1
+	   %ebx+32  PLT2
+	   %ebx+28  %esp
+	   %ebx+24  %ebp
+	   %ebx+20  %eax
+	   %ebx+16  %ecx
+	   %ebx+12  %edx
+	   %ebx+8   %ebx
+	   %ebx+4   free
+	   %ebx     free
+	   %esp     copied stack frame
+	*/
+	movl %eax, (%ebx)
+	movl 12(%ebx), %edx
+	movl 16(%ebx), %ecx
+	movl 20(%ebx), %eax
+	call *(%ebx)
+	movl %ebx, %esp
+	cfi_def_cfa_register (4)
+	movl 8(%esp), %ebx
+	cfi_restore (3)
+	/*
+	    +40     return address
+	    +36     PLT1
+	    +32     PLT2
+	    +28     %esp
+	    +24     %ebp
+	    +20     %eax
+	    +16     %ecx
+	    +12     %edx
+	    +8      free
+	    +4      free
+	   %esp     free
+	*/
+	subl $20, %esp
+	cfi_adjust_cfa_offset (20)
+	movl %eax, (%esp)
+	movl %edx, 4(%esp)
+	fstpt 8(%esp)
+	fstpt 20(%esp)
+	pushl %esp
+	cfi_adjust_cfa_offset (4)
+	leal 36(%esp), %ecx
+	movl 56(%esp), %eax
+	movl 60(%esp), %edx
+	call _dl_call_pltexit
+	movl (%esp), %eax
+	movl 4(%esp), %edx
+	fldt 20(%esp)
+	fldt 8(%esp)
+	addl $60, %esp
+	cfi_adjust_cfa_offset (-60)
+	ret
+	cfi_endproc
+	.size _dl_runtime_profile, .-_dl_runtime_profile
diff --git a/sysdeps/ia64/bits/link.h b/sysdeps/ia64/bits/link.h
index 7f8b0550d9..e69de29bb2 100644
--- a/sysdeps/ia64/bits/link.h
+++ b/sysdeps/ia64/bits/link.h
@@ -1,5 +0,0 @@
-struct link_map_machine
-  {
-    size_t fptr_table_len;
-    Elf64_Addr *fptr_table;
-  };
diff --git a/sysdeps/ia64/bits/linkmap.h b/sysdeps/ia64/bits/linkmap.h
new file mode 100644
index 0000000000..7f8b0550d9
--- /dev/null
+++ b/sysdeps/ia64/bits/linkmap.h
@@ -0,0 +1,5 @@
+struct link_map_machine
+  {
+    size_t fptr_table_len;
+    Elf64_Addr *fptr_table;
+  };
diff --git a/sysdeps/ia64/dl-lookupcfg.h b/sysdeps/ia64/dl-lookupcfg.h
index 0ae3dd68ba..ddf405b551 100644
--- a/sysdeps/ia64/dl-lookupcfg.h
+++ b/sysdeps/ia64/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,9 +17,6 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/* The ia64 need more information from the symbol lookup function
-   than just the address. */
-#define DL_LOOKUP_RETURNS_MAP
 #define ELF_FUNCTION_PTR_IS_SPECIAL
 #define DL_UNMAP_IS_SPECIAL
 
diff --git a/sysdeps/linkmap.h b/sysdeps/linkmap.h
new file mode 100644
index 0000000000..470b4d3e5f
--- /dev/null
+++ b/sysdeps/linkmap.h
@@ -0,0 +1,4 @@
+struct link_map_machine
+  {
+    /* empty by default */
+  };
diff --git a/sysdeps/powerpc/powerpc64/dl-lookupcfg.h b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
index e502941015..e69de29bb2 100644
--- a/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
+++ b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
@@ -1,22 +0,0 @@
-/* Configuration of lookup functions.  PowerPC64 version.
-   Copyright (C) 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* Return the symbol map from the symbol lookup function.  */
-
-#define DL_LOOKUP_RETURNS_MAP 1
diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h
index fc1fba363a..e69de29bb2 100644
--- a/sysdeps/s390/bits/link.h
+++ b/sysdeps/s390/bits/link.h
@@ -1,13 +0,0 @@
-#if __WORDSIZE == 64
-struct link_map_machine
-  {
-    Elf64_Addr plt; /* Address of .plt + 0x2e */
-    Elf64_Addr gotplt; /* Address of .got + 0x18 */
-  };
-#else
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 0x2c */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
-#endif
diff --git a/sysdeps/s390/bits/linkmap.h b/sysdeps/s390/bits/linkmap.h
new file mode 100644
index 0000000000..fc1fba363a
--- /dev/null
+++ b/sysdeps/s390/bits/linkmap.h
@@ -0,0 +1,13 @@
+#if __WORDSIZE == 64
+struct link_map_machine
+  {
+    Elf64_Addr plt; /* Address of .plt + 0x2e */
+    Elf64_Addr gotplt; /* Address of .got + 0x18 */
+  };
+#else
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 0x2c */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
+#endif
diff --git a/sysdeps/sh/bits/link.h b/sysdeps/sh/bits/link.h
index bb2fbb5f16..e69de29bb2 100644
--- a/sysdeps/sh/bits/link.h
+++ b/sysdeps/sh/bits/link.h
@@ -1,5 +0,0 @@
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 36 */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
diff --git a/sysdeps/sh/bits/linkmap.h b/sysdeps/sh/bits/linkmap.h
new file mode 100644
index 0000000000..bb2fbb5f16
--- /dev/null
+++ b/sysdeps/sh/bits/linkmap.h
@@ -0,0 +1,5 @@
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 36 */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
diff --git a/sysdeps/x86_64/bits/link.h b/sysdeps/x86_64/bits/link.h
index 8ea7157156..87c17eb406 100644
--- a/sysdeps/x86_64/bits/link.h
+++ b/sysdeps/x86_64/bits/link.h
@@ -1,14 +1,112 @@
-#if __WORDSIZE == 64
-struct link_map_machine
-  {
-    Elf64_Addr plt; /* Address of .plt + 0x16 */
-    Elf64_Addr gotplt; /* Address of .got + 0x18 */
-  };
+/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef	_LINK_H
+# error "Never include <bits/link.h> directly; use <link.h> instead."
+#endif
+
+
+#if __ELF_NATIVE_CLASS == 32
+/* Registers for entry into PLT on IA-32.  */
+typedef struct La_i86_regs
+{
+  uint32_t lr_edx;
+  uint32_t lr_ecx;
+  uint32_t lr_eax;
+  uint32_t lr_ebp;
+  uint32_t lr_esp;
+} La_i86_regs;
+
+/* Return values for calls from PLT on IA-32.  */
+typedef struct La_i86_retval
+{
+  uint32_t lrv_eax;
+  uint32_t lrv_edx;
+  long double lrv_st0;
+  long double lrv_st1;
+} La_i86_retval;
+
+
+__BEGIN_DECLS
+
+extern Elf32_Addr la_i86_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx,
+				       uintptr_t *__refcook,
+				       uintptr_t *__defcook,
+				       La_i86_regs *__regs,
+				       unsigned int *__flags,
+				       const char *__symname,
+				       long int *__framesizep);
+extern unsigned int la_i86_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx,
+					uintptr_t *__refcook,
+					uintptr_t *__defcook,
+					const La_i86_regs *__inregs,
+					La_i86_retval *__outregs,
+					const char *symname);
+
+__END_DECLS
 
 #else
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 0x16 */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
+
+/* Registers for entry into PLT on x86-64.  */
+typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
+typedef struct La_x86_64_regs
+{
+  uint64_t lr_rdx;
+  uint64_t lr_r8;
+  uint64_t lr_r9;
+  uint64_t lr_rcx;
+  uint64_t lr_rsi;
+  uint64_t lr_rdi;
+  uint64_t lr_rbp;
+  uint64_t lr_rsp;
+  La_x86_64_xmm lr_xmm[8];
+} La_x86_64_regs;
+
+/* Return values for calls from PLT on x86-64.  */
+typedef struct La_x86_64_retval
+{
+  uint64_t lrv_rax;
+  uint64_t lrv_rdx;
+  La_x86_64_xmm lrv_xmm0;
+  La_x86_64_xmm lrv_xmm1;
+  long double lrv_st0;
+  long double lrv_st1;
+} La_x86_64_retval;
+
+
+__BEGIN_DECLS
+
+extern Elf64_Addr la_x86_64_gnu_pltenter (Elf64_Sym *__sym,
+					  unsigned int __ndx,
+					  uintptr_t *__refcook,
+					  uintptr_t *__defcook,
+					  La_x86_64_regs *__regs,
+					  unsigned int *__flags,
+					  const char *__symname,
+					  long int *__framesizep);
+extern unsigned int la_x86_64_gnu_pltexit (Elf64_Sym *__sym,
+					   unsigned int __ndx,
+					   uintptr_t *__refcook,
+					   uintptr_t *__defcook,
+					   const La_x86_64_regs *__inregs,
+					   La_x86_64_retval *__outregs,
+					   const char *symname);
+
+__END_DECLS
+
 #endif
diff --git a/sysdeps/x86_64/bits/linkmap.h b/sysdeps/x86_64/bits/linkmap.h
new file mode 100644
index 0000000000..8ea7157156
--- /dev/null
+++ b/sysdeps/x86_64/bits/linkmap.h
@@ -0,0 +1,14 @@
+#if __WORDSIZE == 64
+struct link_map_machine
+  {
+    Elf64_Addr plt; /* Address of .plt + 0x16 */
+    Elf64_Addr gotplt; /* Address of .got + 0x18 */
+  };
+
+#else
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 0x16 */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
+#endif
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index b932f51d15..18bff95dcd 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -116,7 +116,8 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf64_Addr) &_dl_runtime_profile;
 
-	  if (_dl_name_match_p (GLRO(dl_profile), l))
+	  if (GLRO(dl_profile) != NULL
+	      && _dl_name_match_p (GLRO(dl_profile), l))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = l;
@@ -130,128 +131,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
-/* This code is used in dl-runtime.c to call the `fixup' function
-   and then redirect to the address it returns.  */
-#ifndef PROF
-# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
-	.text\n\
-	.globl _dl_runtime_resolve\n\
-	.type _dl_runtime_resolve, @function\n\
-	.align 16\n\
-	" CFI_STARTPROC "\n\
-_dl_runtime_resolve:\n\
-	subq $56,%rsp\n\
-	" CFI_ADJUST_CFA_OFFSET(72)" # Incorporate PLT\n\
-	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.\n\
-	movq %rcx,8(%rsp)\n\
-	movq %rdx,16(%rsp)\n\
-	movq %rsi,24(%rsp)\n\
-	movq %rdi,32(%rsp)\n\
-	movq %r8,40(%rsp)\n\
-	movq %r9,48(%rsp)\n\
-	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.\n\
-	movq %rsi,%r11		# Multiply by 24\n\
-	addq %r11,%rsi\n\
-	addq %r11,%rsi\n\
-	shlq $3, %rsi\n\
-	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset\n\
-	call fixup		# Call resolver.\n\
-	movq %rax, %r11		# Save return value\n\
-	movq 48(%rsp),%r9	# Get register content back.\n\
-	movq 40(%rsp),%r8\n\
-	movq 32(%rsp),%rdi\n\
-	movq 24(%rsp),%rsi\n\
-	movq 16(%rsp),%rdx\n\
-	movq 8(%rsp),%rcx\n\
-	movq (%rsp),%rax\n\
-	addq $72,%rsp		# Adjust stack(PLT did 2 pushes)\n\
-	" CFI_ADJUST_CFA_OFFSET(-72)" \n\
-	jmp *%r11		# Jump to function address.\n\
-	" CFI_ENDPROC "\n\
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
-\n\
-	.globl _dl_runtime_profile\n\
-	.type _dl_runtime_profile, @function\n\
-	.align 16\n\
-	" CFI_STARTPROC "\n\
-_dl_runtime_profile:\n\
-	subq $56,%rsp\n\
-	" CFI_ADJUST_CFA_OFFSET(72)" # Incorporate PLT\n\
-	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.\n\
-	movq %rcx,8(%rsp)\n\
-	movq %rdx,16(%rsp)\n\
-	movq %rsi,24(%rsp)\n\
-	movq %rdi,32(%rsp)\n\
-	movq %r8,40(%rsp)\n\
-	movq %r9,48(%rsp)\n\
-	movq 72(%rsp), %rdx	# Load return address if needed\n\
-	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.\n\
-	movq %rsi,%r11		# Multiply by 24\n\
-	addq %r11,%rsi\n\
-	addq %r11,%rsi\n\
-	shlq $3, %rsi\n\
-	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset\n\
-	call profile_fixup	# Call resolver.\n\
-	movq %rax, %r11		# Save return value\n\
-	movq 48(%rsp),%r9	# Get register content back.\n\
-	movq 40(%rsp),%r8\n\
-	movq 32(%rsp),%rdi\n\
-	movq 24(%rsp),%rsi\n\
-	movq 16(%rsp),%rdx\n\
-	movq 8(%rsp),%rcx\n\
-	movq (%rsp),%rax\n\
-	addq $72,%rsp		# Adjust stack\n\
-	" CFI_ADJUST_CFA_OFFSET(-72)"\n\
-	jmp *%r11		# Jump to function address.\n\
-	" CFI_ENDPROC "\n\
-	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
-	.previous\n\
-");
-#else
-# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
-	.text\n\
-	.globl _dl_runtime_resolve\n\
-	.globl _dl_runtime_profile\n\
-	.type _dl_runtime_resolve, @function\n\
-	.type _dl_runtime_profile, @function\n\
-	.align 16\n\
-	" CFI_STARTPROC "\n\
-_dl_runtime_resolve:\n\
-_dl_runtime_profile:\n\
-	subq $56,%rsp\n\
-	" CFI_ADJUST_CFA_OFFSET(72)" # Incorporate PLT\n\
-	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.\n\
-	movq %rcx,8(%rsp)\n\
-	movq %rdx,16(%rsp)\n\
-	movq %rsi,24(%rsp)\n\
-	movq %rdi,32(%rsp)\n\
-	movq %r8,40(%rsp)\n\
-	movq %r9,48(%rsp)\n\
-	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.\n\
-	movq %rsi,%r11		# Multiply by 24\n\
-	addq %r11,%rsi\n\
-	addq %r11,%rsi\n\
-	shlq $3, %rsi\n\
-	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset\n\
-	call fixup		# Call resolver.\n\
-	movq %rax, %r11		# Save return value\n\
-	movq 48(%rsp),%r9	# Get register content back.\n\
-	movq 40(%rsp),%r8\n\
-	movq 32(%rsp),%rdi\n\
-	movq 24(%rsp),%rsi\n\
-	movq 16(%rsp),%rdx\n\
-	movq 8(%rsp),%rcx\n\
-	movq (%rsp),%rax\n\
-	addq $72,%rsp		# Adjust stack\n\
-	" CFI_ADJUST_CFA_OFFSET(-72)"\n\
-	jmp *%r11		# Jump to function address.\n\
-	" CFI_ENDPROC "\n\
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
-	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
-	.previous\n\
-");
-#endif
-
 /* Initial entry point code for the dynamic linker.
    The C function `_dl_start' is the real entry point;
    its return value is the user program's entry point.  */
@@ -348,9 +227,14 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
   return value;
 }
 
+
+/* Names of the architecture-specific auditing callback functions.  */
+#define ARCH_LA_PLTENTER x86_64_gnu_pltenter
+#define ARCH_LA_PLTEXIT x86_64_gnu_pltexit
+
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE
+#ifdef RESOLVE_MAP
 
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
@@ -390,7 +274,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
 #ifndef RTLD_BOOTSTRAP
       const Elf64_Sym *const refsym = sym;
 #endif
-#if defined USE_TLS && !defined RTLD_BOOTSTRAP
+#ifndef RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf64_Addr value = (sym == NULL ? 0
 			  : (Elf64_Addr) sym_map->l_addr + sym->st_value);
@@ -553,4 +437,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, r_type, 1);
 }
 
-#endif /* RESOLVE */
+#endif /* RESOLVE_MAP */
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
new file mode 100644
index 0000000000..eb46f29cf2
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -0,0 +1,188 @@
+/* PLT trampolines.  x86-64 version.
+   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.text
+	.globl _dl_runtime_resolve
+	.type _dl_runtime_resolve, @function
+	.align 16
+	cfi_startproc
+_dl_runtime_resolve:
+	subq $56,%rsp
+	cfi_adjust_cfa_offset(72) # Incorporate PLT
+	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.
+	movq %rcx, 8(%rsp)
+	movq %rdx, 16(%rsp)
+	movq %rsi, 24(%rsp)
+	movq %rdi, 32(%rsp)
+	movq %r8, 40(%rsp)
+	movq %r9, 48(%rsp)
+	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.
+	movq %rsi, %r11		# Multiply by 24
+	addq %r11, %rsi
+	addq %r11, %rsi
+	shlq $3, %rsi
+	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
+	call _dl_fixup		# Call resolver.
+	movq %rax, %r11		# Save return value
+	movq 48(%rsp), %r9	# Get register content back.
+	movq 40(%rsp), %r8
+	movq 32(%rsp), %rdi
+	movq 24(%rsp), %rsi
+	movq 16(%rsp), %rdx
+	movq 8(%rsp), %rcx
+	movq (%rsp), %rax
+	addq $72, %rsp		# Adjust stack(PLT did 2 pushes)
+	cfi_adjust_cfa_offset(-72)
+	jmp *%r11		# Jump to function address.
+	cfi_endproc
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve
+
+
+
+	.globl _dl_runtime_profile
+	.type _dl_runtime_profile, @function
+	.align 16
+	cfi_startproc
+_dl_runtime_profile:
+	subq $80, %rsp
+	cfi_adjust_cfa_offset(96) # Incorporate PLT
+	movq %rax, (%rsp)	# Preserve registers otherwise clobbered.
+	movq %rdx, 8(%rsp)
+	movq %r8, 16(%rsp)
+	movq %r9, 24(%rsp)
+	movq %rcx, 32(%rsp)
+	movq %rsi, 40(%rsp)
+	movq %rdi, 48(%rsp)
+	movq %rbp, 56(%rsp)	# Information for auditors.
+	leaq 96(%rsp), %rax
+	movq %rax, 64(%rsp)
+	leaq 8(%rsp), %rcx
+	movq 96(%rsp), %rdx	# Load return address if needed
+	movq 88(%rsp), %rsi	# Copy args pushed by PLT in register.
+	movq %rsi,%r11		# Multiply by 24
+	addq %r11,%rsi
+	addq %r11,%rsi
+	shlq $3, %rsi
+	movq 80(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
+	leaq 72(%rsp), %r8
+	call _dl_profile_fixup	# Call resolver.
+	movq %rax, %r11		# Save return value
+	movq 8(%rsp), %rdx	# Get back register content.
+	movq 16(%rsp), %r8
+	movq 24(%rsp), %r9
+	movq (%rsp),%rax
+	movq 72(%rsp), %r10
+	testq %r10, %r10
+	jns 1f
+	movq 32(%rsp), %rcx
+	movq 40(%rsp), %rsi
+	movq 48(%rsp), %rdi
+	addq $96,%rsp		# Adjust stack
+	cfi_adjust_cfa_offset (-96)
+	jmp *%r11		# Jump to function address.
+
+	/*
+	    +96     return address
+	    +88     PLT2
+	    +80     PLT1
+	    +72     free
+	    +64     %rsp
+	    +56     %rbp
+	    +48     %rdi
+	    +40     %rsi
+	    +32     %rcx
+	    +24     %r9
+	    +16     %r8
+	    +8      %rdx
+	   %esp     %rax
+	*/
+	cfi_adjust_cfa_offset (96)
+1:	movq %rbx, 72(%rsp)
+	cfi_rel_offset (1, 72)
+	leaq 104(%rsp), %rsi
+	movq %rsp, %rbx
+	cfi_def_cfa_register (1)
+	subq %r10, %rsp
+	movq %rsp, %rdi
+	movq %r10, %rcx
+	shrq $3, %rcx
+	rep
+	movsq
+	andq $0xfffffffffffffff0, %rsp
+	movq 32(%rbx), %rcx
+	movq 40(%rbx), %rsi
+	movq 48(%rbx), %rdi
+	call *%r11
+	movq %rbx, %rsp
+	cfi_def_cfa_register (7)
+	subq $72, %rsp
+	cfi_adjust_cfa_offset (72)
+	movq %rsp, %rcx
+	movq %rax, (%rcx)
+	movq %rdx, 8(%rcx)
+	/* Even though the stack is correctly aligned to allow using movaps
+	   we use movups.  Some callers might provide an incorrectly aligned
+	   stack and we do not want to have it blow up here.  */
+	movups %xmm0, 16(%rcx)
+	movups %xmm1, 32(%rcx)
+	fstpt 48(%rcx)
+	fstpt 64(%rcx)
+	/*
+	    +168    return address
+	    +160    PLT2
+	    +152    PLT1
+	    +144    free
+	    +136    %rsp
+	    +128    %rbp
+	    +120    %rdi
+	    +112    %rsi
+	    +104    %rcx
+	    +96     %r9
+	    +88     %r8
+	    +80     %rdx
+	    +64     %st1 result
+	    +48     %st result
+	    +32     %xmm1 result
+	    +16     %xmm0 result
+	    +8      %rdx result
+	   %esp     %rax result
+	*/
+	leaq 80(%rsp), %rdx
+	movq 144(%rsp), %rbx
+	cfi_restore (1)
+	movq 160(%rsp), %rsi	# Copy args pushed by PLT in register.
+	movq %rsi,%r11		# Multiply by 24
+	addq %r11,%rsi
+	addq %r11,%rsi
+	shlq $3, %rsi
+	movq 152(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
+	call _dl_call_pltexit
+	movq (%rsp), %rax
+	movq 8(%rsp), %rdx
+	movups 16(%rsp), %xmm0
+	movups 32(%rsp), %xmm1
+	fldt 64(%rsp)
+	fldt 48(%rsp)
+	addq $168, %rsp
+	cfi_adjust_cfa_offset (-168)
+	retq
+	cfi_endproc
+	.size _dl_runtime_profile, .-_dl_runtime_profile