summary refs log tree commit diff
path: root/sysdeps/ia64/dl-machine.h
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ia64/dl-machine.h')
-rw-r--r--sysdeps/ia64/dl-machine.h596
1 files changed, 294 insertions, 302 deletions
diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h
index 9ad777e99f..a2192a1e2e 100644
--- a/sysdeps/ia64/dl-machine.h
+++ b/sysdeps/ia64/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  IA-64 version.
-   Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -32,26 +32,43 @@
    in l_info array.  */
 #define DT_IA_64(x) (DT_IA_64_##x - DT_LOPROC + DT_NUM)
 
+/* There are currently 123 dynamic symbols in ld.so.
+   IA64_BOOT_FPTR_TABLE_LEN needs to be at least that big.  */
+#define IA64_BOOT_FPTR_TABLE_LEN	200
 
-/* An FPTR is a function descriptor.  Properly they consist of just
-   FUNC and GP.  But we want to traverse a binary tree too.  */
+/* An FDESC is a function descriptor.  */
 
-#define IA64_BOOT_FPTR_SIZE	256
+struct ia64_fdesc
+  {
+    Elf64_Addr ip;	/* code entry point */
+    Elf64_Addr gp;	/* global pointer */
+  };
 
-struct ia64_fptr
+struct ia64_fdesc_table
+  {
+    struct ia64_fdesc_table *next;
+    unsigned int len;			/* # of entries in fdesc table */
+    volatile unsigned int first_unused;	/* index of first available entry */
+    struct ia64_fdesc fdesc[0];
+  };
+
+extern Elf64_Addr __ia64_make_fptr (struct link_map *, const Elf64_Sym *,
+				    Elf64_Addr);
+
+static inline void
+__ia64_init_bootstrap_fdesc_table (struct link_map *map)
 {
-  Elf64_Addr func;
-  Elf64_Addr gp;
-  struct ia64_fptr *next;
-};
+  Elf64_Addr *boot_table;
 
-extern struct ia64_fptr __boot_ldso_fptr[];
-extern struct ia64_fptr *__fptr_next;
-extern struct ia64_fptr *__fptr_root;
-extern int __fptr_count;
+  /* careful: this will be called before got has been relocated... */
+  asm ("addl %0 = @gprel (__ia64_boot_fptr_table), gp" : "=r"(boot_table));
 
-extern Elf64_Addr __ia64_make_fptr (const struct link_map *, Elf64_Addr,
-				    struct ia64_fptr **, struct ia64_fptr *);
+  map->l_mach.fptr_table_len = IA64_BOOT_FPTR_TABLE_LEN;
+  map->l_mach.fptr_table = boot_table;
+}
+
+#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info)		\
+	__ia64_init_bootstrap_fdesc_table (&bootstrap_map);
 
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int
@@ -67,7 +84,7 @@ elf_machine_dynamic (void)
 {
   Elf64_Addr *p;
 
-  __asm__(
+  __asm__ (
 	".section .sdata\n"
 	"	.type __dynamic_ltv#, @object\n"
 	"	.size __dynamic_ltv#, 8\n"
@@ -75,7 +92,7 @@ elf_machine_dynamic (void)
 	"	data8	@ltv(_DYNAMIC#)\n"
 	".previous\n"
 	"	addl	%0 = @gprel(__dynamic_ltv#), gp ;;"
-	: "=r"(p));
+	: "=r" (p));
 
   return *p;
 }
@@ -88,19 +105,18 @@ elf_machine_load_address (void)
   Elf64_Addr ip;
   int *p;
 
-  __asm__(
+  __asm__ (
 	"1:	mov %0 = ip\n"
 	".section .sdata\n"
 	"2:	data4	@ltv(1b)\n"
 	"       .align 8\n"
 	".previous\n"
 	"	addl	%1 = @gprel(2b), gp ;;"
-	: "=r"(ip), "=r"(p));
+	: "=r" (ip), "=r" (p));
 
-  return ip - (Elf64_Addr)*p;
+  return ip - (Elf64_Addr) *p;
 }
 
-
 /* Set up the loaded object described by L so its unrelocated PLT
    entries will jump to the on-demand fixup code in dl-runtime.c.  */
 
@@ -112,21 +128,21 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 
   if (lazy)
     {
-      register Elf64_Addr gp __asm__("gp");
+      register Elf64_Addr gp __asm__ ("gp");
       Elf64_Addr *reserve, doit;
 
       /*
        * Careful with the typecast here or it will try to add l-l_addr
        * pointer elements
        */
-      reserve = (Elf64_Addr *)
-	      (l->l_info[DT_IA_64(PLT_RESERVE)]->d_un.d_ptr + l->l_addr);
+      reserve = ((Elf64_Addr *)
+		 (l->l_info[DT_IA_64 (PLT_RESERVE)]->d_un.d_ptr + l->l_addr));
       /* Identify this shared object.  */
       reserve[0] = (Elf64_Addr) l;
 
       /* This function will be called to perform the relocation.  */
       if (!profile)
-	doit = (Elf64_Addr) ((struct ia64_fptr *)&_dl_runtime_resolve)->func;
+	doit = (Elf64_Addr) ((struct ia64_fdesc *) &_dl_runtime_resolve)->ip;
       else
 	{
 	  if (_dl_name_match_p (_dl_profile, l))
@@ -135,7 +151,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 		 want profiling and the timers are started.  */
 	      _dl_profile_map = l;
 	    }
-	  doit = (Elf64_Addr) ((struct ia64_fptr *)&_dl_runtime_profile)->func;
+	  doit = (Elf64_Addr) ((struct ia64_fdesc *) &_dl_runtime_profile)->ip;
 	}
 
       reserve[1] = doit;
@@ -149,111 +165,111 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 /*
    This code is used in dl-runtime.c to call the `fixup' function
    and then redirect to the address it returns. `fixup()' takes two
-   arguments, however fixup_profile() takes three.
+   arguments, however profile_fixup() takes three.
 
    The ABI specifies that we will never see more than 8 input
    registers to a function call, thus it is safe to simply allocate
    those, and simpler than playing stack games.
 					                     - 12/09/99 Jes
  */
-#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name) \
-  extern void tramp_name (void); \
-  asm ( "\
-	.global " #tramp_name "#
-	.proc " #tramp_name "#
-" #tramp_name ":
-	{ .mmi
-	  alloc loc0 = ar.pfs, 8, 6, 3, 0
-	  adds r2 = -144, r12
-	  adds r3 = -128, r12
-	}
-	{ .mii
-	  adds r12 = -160, r12
-	  mov loc1 = b0
-	  mov out2 = b0		/* needed by fixup_profile */
-	  ;;
-	}
-	{ .mfb
-	  mov loc2 = r8		/* preserve struct value register */
-	  nop.f 0
-	  nop.b 0
-	}
-	{ .mii
-	  mov loc3 = r9		/* preserve language specific register */
-	  mov loc4 = r10	/* preserve language specific register */
-	  mov loc5 = r11	/* preserve language specific register */
-	}
-	{ .mmi
-	  stf.spill [r2] = f8, 32
-	  stf.spill [r3] = f9, 32
-	  mov out0 = r16
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f10, 32
-	  stf.spill [r3] = f11, 32
-	  shl out1 = r15, 4
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f12, 32
-	  stf.spill [r3] = f13, 32
-	  shladd out1 = r15, 3, out1
-	  ;;
-	}
-	{ .mmb
-	  stf.spill [r2] = f14
-	  stf.spill [r3] = f15
-	  br.call.sptk.many b0 = " #fixup_name "#
-	}
-	{ .mii
-	  ld8 r9 = [ret0], 8
-	  adds r2 = 16, r12
-	  adds r3 = 32, r12
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f8 = [r2], 32
-	  ldf.fill f9 = [r3], 32
-	  mov b0 = loc1
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f10 = [r2], 32
-	  ldf.fill f11 = [r3], 32
-	  mov b6 = r9
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f12 = [r2], 32
-	  ldf.fill f13 = [r3], 32
-	  mov ar.pfs = loc0
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f14 = [r2], 32
-	  ldf.fill f15 = [r3], 32
-	  adds r12 = 160, r12
-	  ;;
-	}
-	{ .mii
-	  mov r9 = loc3		/* restore language specific register */
-	  mov r10 = loc4	/* restore language specific register */
-	  mov r11 = loc5	/* restore language specific register */
-	}
-	{ .mii
-	  ld8 gp = [ret0]
-	  mov r8 = loc2		/* restore struct value register */
-	  ;;
-	}
-	/* An alloc is needed for the break system call to work.
-	   We don't care about the old value of the pfs register.  */
-	{ .mmb
-	  alloc r2 = ar.pfs, 0, 0, 8, 0
-	  br.sptk.many b6
-	  ;;
-	}
-	.endp " #tramp_name "#")
+#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name)			     \
+  extern void tramp_name (void);					     \
+  asm (									     \
+"	.global " #tramp_name "#\n"					     \
+"	.proc " #tramp_name "#\n"					     \
+#tramp_name ":\n"							     \
+"	{ .mmi\n"							     \
+"	  alloc loc0 = ar.pfs, 8, 6, 3, 0\n"				     \
+"	  adds r2 = -144, r12\n"					     \
+"	  adds r3 = -128, r12\n"					     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  adds r12 = -160, r12\n"					     \
+"	  mov loc1 = b0\n"						     \
+"	  mov out2 = b0		/* needed by fixup_profile */\n"	     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mfb\n"							     \
+"	  mov loc2 = r8		/* preserve struct value register */\n"	     \
+"	  nop.f 0\n"							     \
+"	  nop.b 0\n"							     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  mov loc3 = r9		/* preserve language specific register */\n" \
+"	  mov loc4 = r10	/* preserve language specific register */\n" \
+"	  mov loc5 = r11	/* preserve language specific register */\n" \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  stf.spill [r2] = f8, 32\n"					     \
+"	  stf.spill [r3] = f9, 32\n"					     \
+"	  mov out0 = r16\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  stf.spill [r2] = f10, 32\n"					     \
+"	  stf.spill [r3] = f11, 32\n"					     \
+"	  shl out1 = r15, 4\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  stf.spill [r2] = f12, 32\n"					     \
+"	  stf.spill [r3] = f13, 32\n"					     \
+"	  shladd out1 = r15, 3, out1\n"					     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmb\n"							     \
+"	  stf.spill [r2] = f14\n"					     \
+"	  stf.spill [r3] = f15\n"					     \
+"	  br.call.sptk.many b0 = " #fixup_name "#\n"			     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  ld8 r9 = [ret0], 8\n"						     \
+"	  adds r2 = 16, r12\n"						     \
+"	  adds r3 = 32, r12\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f8 = [r2], 32\n"					     \
+"	  ldf.fill f9 = [r3], 32\n"					     \
+"	  mov b0 = loc1\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f10 = [r2], 32\n"					     \
+"	  ldf.fill f11 = [r3], 32\n"					     \
+"	  mov b6 = r9\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f12 = [r2], 32\n"					     \
+"	  ldf.fill f13 = [r3], 32\n"					     \
+"	  mov ar.pfs = loc0\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f14 = [r2], 32\n"					     \
+"	  ldf.fill f15 = [r3], 32\n"					     \
+"	  adds r12 = 160, r12\n"					     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  mov r9 = loc3		/* restore language specific register */\n"  \
+"	  mov r10 = loc4	/* restore language specific register */\n"  \
+"	  mov r11 = loc5	/* restore language specific register */\n"  \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  ld8 gp = [ret0]\n"						     \
+"	  mov r8 = loc2		/* restore struct value register */\n"	     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	/* An alloc is needed for the break system call to work.\n"	     \
+"	   We don't care about the old value of the pfs register.  */\n"     \
+"	{ .mmb\n"							     \
+"	  alloc r2 = ar.pfs, 0, 0, 8, 0\n"				     \
+"	  br.sptk.many b6\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	.endp " #tramp_name "#\n");
 
 #ifndef PROF
 #define ELF_MACHINE_RUNTIME_TRAMPOLINE 				\
@@ -270,148 +286,148 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
    The C function `_dl_start' is the real entry point;
    its return value is the user program's entry point.  */
 
-#define RTLD_START asm ("\
-.text
-	.global _start#
-	.proc _start#
-_start:
-0:	{ .mii
-	  alloc loc0 = ar.pfs, 0, 3, 4, 0
-	  mov r2 = ip
-	  addl r3 = @gprel(0b), r0
-	  ;;
-	}
-	{ .mlx
-	  /* Calculate the GP, and save a copy in loc1.  */
-	  sub gp = r2, r3
-	  movl r8 = 0x9804c0270033f
-	  ;;
-	}
-	{ .mii
-	  mov ar.fpsr = r8
-	  sub loc1 = r2, r3
-	  /* _dl_start wants a pointer to the pointer to the arg block
-	     and the arg block starts with an integer, thus the magic 16.  */
-	  adds out0 = 16, sp
-	}
-	{ .bbb
-	  br.call.sptk.many b0 = _dl_start#
-	  ;;
-	}
-	.endp _start#
-	/* FALLTHRU */
-	.global _dl_start_user#
-	.proc _dl_start_user#
-_dl_start_user:
-	{ .mii
-	  /* Save the pointer to the user entry point fptr in loc2.  */
-	  mov loc2 = ret0
-	  /* Store the highest stack address.  */
-	  addl r2 = @ltoff(__libc_stack_end#), gp
-	  addl r3 = @gprel(_dl_skip_args), gp
-	  ;;
-	}
-	{ .mmi
-	  ld8 r2 = [r2]
-	  ld4 r3 = [r3]
-	  adds r11 = 24, sp	/* Load the address of argv. */
-	  ;;
-	}
-	{ .mii
-	  st8 [r2] = sp
-	  adds r10 = 16, sp	/* Load the address of argc. */
-	  mov out2 = r11
-	  ;;
-	  /* See if we were run as a command with the executable file
-	     name as an extra leading argument.  If so, adjust the argv
-	     pointer to skip _dl_skip_args words.
-	     Note that _dl_skip_args is an integer, not a long - Jes
-
-	     The stack pointer has to be 16 byte aligned. We cannot simply
-	     addjust the stack pointer. We have to move the whole argv and
-	     envp and adjust _dl_argv by _dl_skip_args.  H.J.  */
-	}
-	{ .mib
-	  ld8 out1 = [r10]	/* is argc actually stored as a long
-				   or as an int? */
-	  addl r2 = @ltoff(_dl_argv), gp
-	  ;;
-	}
-	{ .mmi
-	  ld8 r2 = [r2]		/* Get the address of _dl_argv. */
-	  sub out1 = out1, r3	/* Get the new argc. */
-	  shladd r3 = r3, 3, r0
-	  ;;
-	}
-	{
-	  .mib
-	  ld8 r17 = [r2]	/* Get _dl_argv. */
-	  add r15 = r11, r3	/* The address of the argv we move */
-	  ;;
-	}
-	/* ??? Could probably merge these two loops into 3 bundles.
-	   using predication to control which set of copies we're on.  */
-1:	/* Copy argv. */
-	{ .mfi
-	  ld8 r16 = [r15], 8	/* Load the value in the old argv. */
-	  ;;
-	}
-	{ .mib
-	  st8 [r11] = r16, 8	/* Store it in the new argv. */
-	  cmp.ne p6, p7 = 0, r16
-(p6)	  br.cond.dptk.few 1b
-	  ;;
-	}
-	{ .mmi
-	  mov out3 = r11
-	  sub r17 = r17, r3	/* Substract _dl_skip_args. */
-	  addl out0 = @ltoff(_dl_loaded), gp
-	}
-1:	/* Copy env. */
-	{ .mfi
-	  ld8 r16 = [r15], 8	/* Load the value in the old env. */
-	  ;;
-	}
-	{ .mib
-	  st8 [r11] = r16, 8	/* Store it in the new env. */
-	  cmp.ne p6, p7 = 0, r16
-(p6)	  br.cond.dptk.few 1b
-	  ;;
-	}
-	{ .mmb
-	  st8 [r10] = out1		/* Record the new argc. */
-	  ld8 out0 = [out0]
-	  ;;
-	}
-	{ .mmb
-	  ld8 out0 = [out0]		/* get the linkmap */
-	  st8 [r2] = r17		/* Load the new _dl_argv. */
-	  br.call.sptk.many b0 = _dl_init#
-	  ;;
-	}
-	/* Pass our finializer function to the user,
-	   and jump to the user's entry point.  */
-	{ .mmi
-	  ld8 r3 = [loc2], 8
-	  mov b0 = r0
-	}
-	{ .mmi
-	  addl ret0 = @ltoff(@fptr(_dl_fini#)), gp
-	  ;;
-	  mov b6 = r3
-	}
-	{ .mmi
-	  ld8 ret0 = [ret0]
-	  ld8 gp = [loc2]
-	  mov ar.pfs = loc0
-	  ;;
-	}
-	{ .mfb
-	  br.sptk.many b6
-	  ;;
-	}
-	.endp _dl_start_user#
-.previous");
+#define RTLD_START asm (						      \
+".text\n"								      \
+"	.global _start#\n"						      \
+"	.proc _start#\n"						      \
+"_start:\n"								      \
+"0:	{ .mii\n"							      \
+"	  alloc loc0 = ar.pfs, 0, 3, 4, 0\n"				      \
+"	  mov r2 = ip\n"						      \
+"	  addl r3 = @gprel(0b), r0\n"					      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mlx\n"							      \
+"	  /* Calculate the GP, and save a copy in loc1.  */\n"		      \
+"	  sub gp = r2, r3\n"						      \
+"	  movl r8 = 0x9804c0270033f\n"					      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mii\n"							      \
+"	  mov ar.fpsr = r8\n"						      \
+"	  sub loc1 = r2, r3\n"						      \
+"	  /* _dl_start wants a pointer to the pointer to the arg block and\n" \
+"	     the arg block starts with an integer, thus the magic 16. */\n"   \
+"	  adds out0 = 16, sp\n"						      \
+"	}\n"								      \
+"	{ .bbb\n"							      \
+"	  br.call.sptk.many b0 = _dl_start#\n"				      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	.endp _start#\n"						      \
+"	/* FALLTHRU */\n"						      \
+"	.global _dl_start_user#\n"					      \
+"	.proc _dl_start_user#\n"					      \
+"_dl_start_user:\n"							      \
+"	{ .mii\n"							      \
+"	  /* Save the pointer to the user entry point fptr in loc2.  */\n"    \
+"	  mov loc2 = ret0\n"						      \
+"	  /* Store the highest stack address.  */\n"			      \
+"	  addl r2 = @ltoff(__libc_stack_end#), gp\n"			      \
+"	  addl r3 = @gprel(_dl_skip_args), gp\n"			      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mmi\n"							      \
+"	  ld8 r2 = [r2]\n"						      \
+"	  ld4 r3 = [r3]\n"						      \
+"	  adds r11 = 24, sp	/* Load the address of argv. */\n"	      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mii\n"							      \
+"	  st8 [r2] = sp\n"						      \
+"	  adds r10 = 16, sp	/* Load the address of argc. */\n"	      \
+"	  mov out2 = r11\n"						      \
+"	  ;;\n"								      \
+"	  /* See if we were run as a command with the executable file\n"      \
+"	     name as an extra leading argument.  If so, adjust the argv\n"    \
+"	     pointer to skip _dl_skip_args words.\n"			      \
+"	     Note that _dl_skip_args is an integer, not a long - Jes\n"	      \
+"\n"									      \
+"	     The stack pointer has to be 16 byte aligned. We cannot simply\n" \
+"	     addjust the stack pointer. We have to move the whole argv and\n" \
+"	     envp and adjust _dl_argv by _dl_skip_args.  H.J.  */\n"	      \
+"	}\n"								      \
+"	{ .mib\n"							      \
+"	  ld8 out1 = [r10]	/* is argc actually stored as a long\n"	      \
+"				   or as an int? */\n"			      \
+"	  addl r2 = @ltoff(_dl_argv), gp\n"				      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mmi\n"							      \
+"	  ld8 r2 = [r2]		/* Get the address of _dl_argv. */\n"	      \
+"	  sub out1 = out1, r3	/* Get the new argc. */\n"		      \
+"	  shladd r3 = r3, 3, r0\n"					      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{\n"								      \
+"	  .mib\n"							      \
+"	  ld8 r17 = [r2]	/* Get _dl_argv. */\n"			      \
+"	  add r15 = r11, r3	/* The address of the argv we move */\n"      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	/* ??? Could probably merge these two loops into 3 bundles.\n"	      \
+"	   using predication to control which set of copies we're on.  */\n"  \
+"1:	/* Copy argv. */\n"						      \
+"	{ .mfi\n"							      \
+"	  ld8 r16 = [r15], 8	/* Load the value in the old argv. */\n"      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mib\n"							      \
+"	  st8 [r11] = r16, 8	/* Store it in the new argv. */\n"	      \
+"	  cmp.ne p6, p7 = 0, r16\n"					      \
+"(p6)	  br.cond.dptk.few 1b\n"					      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mmi\n"							      \
+"	  mov out3 = r11\n"						      \
+"	  sub r17 = r17, r3	/* Substract _dl_skip_args. */\n"	      \
+"	  addl out0 = @ltoff(_dl_loaded), gp\n"				      \
+"	}\n"								      \
+"1:	/* Copy env. */\n"						      \
+"	{ .mfi\n"							      \
+"	  ld8 r16 = [r15], 8	/* Load the value in the old env. */\n"	      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mib\n"							      \
+"	  st8 [r11] = r16, 8	/* Store it in the new env. */\n"	      \
+"	  cmp.ne p6, p7 = 0, r16\n"					      \
+"(p6)	  br.cond.dptk.few 1b\n"					      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mmb\n"							      \
+"	  st8 [r10] = out1		/* Record the new argc. */\n"	      \
+"	  ld8 out0 = [out0]\n"						      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mmb\n"							      \
+"	  ld8 out0 = [out0]		/* get the linkmap */\n"	      \
+"	  st8 [r2] = r17		/* Load the new _dl_argv. */\n"	      \
+"	  br.call.sptk.many b0 = _dl_init#\n"				      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	/* Pass our finializer function to the user,\n"			      \
+"	   and jump to the user's entry point.  */\n"			      \
+"	{ .mmi\n"							      \
+"	  ld8 r3 = [loc2], 8\n"						      \
+"	  mov b0 = r0\n"						      \
+"	}\n"								      \
+"	{ .mmi\n"							      \
+"	  addl ret0 = @ltoff(@fptr(_dl_fini#)), gp\n"			      \
+"	  ;;\n"								      \
+"	  mov b6 = r3\n"						      \
+"	}\n"								      \
+"	{ .mmi\n"							      \
+"	  ld8 ret0 = [ret0]\n"						      \
+"	  ld8 gp = [loc2]\n"						      \
+"	  mov ar.pfs = loc0\n"						      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	{ .mfb\n"							      \
+"	  br.sptk.many b6\n"						      \
+"	  ;;\n"								      \
+"	}\n"								      \
+"	.endp _dl_start_user#\n"					      \
+".previous\n");
 
 
 #ifndef RTLD_START_SPECIAL_INIT
@@ -434,8 +450,8 @@ _dl_start_user:
 #define ELF_MACHINE_NO_REL 1
 
 /* Return the address of the entry point. */
-#define ELF_MACHINE_START_ADDRESS(map, start) \
-  DL_FUNCTION_ADDRESS (map, start)
+#define ELF_MACHINE_START_ADDRESS(map, start)	\
+  DL_STATIC_FUNCTION_ADDRESS (map, start)
 
 #define elf_machine_profile_fixup_plt(l, reloc, rel_addr, value) \
   elf_machine_fixup_plt (l, reloc, rel_addr, value)
@@ -452,7 +468,9 @@ elf_machine_fixup_plt (struct link_map *l, lookup_t t,
    * being called */
   /* got has already been relocated in elf_get_dynamic_info() */
   reloc_addr[1] = t->l_info[DT_PLTGOT]->d_un.d_ptr;
-  reloc_addr[0] = value;
+  /* we need a "release" here to ensure that the gp is visible before
+     the code entry point is updated: */
+  ((volatile Elf64_Addr *) reloc_addr)[0] = value;
   return (Elf64_Addr) reloc_addr;
 }
 
@@ -487,7 +505,7 @@ elf_machine_rela (struct link_map *map,
 		  const struct r_found_version *version,
 		  Elf64_Addr *const reloc_addr)
 {
-  unsigned long const r_type = ELF64_R_TYPE (reloc->r_info);
+  const unsigned long int r_type = ELF64_R_TYPE (reloc->r_info);
   Elf64_Addr value;
 
 #ifndef RTLD_BOOTSTRAP
@@ -533,35 +551,9 @@ elf_machine_rela (struct link_map *map,
 	      return;
 	    }
 	  else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_FPTR64LSB))
-#ifndef RTLD_BOOTSTRAP
-	    value = __ia64_make_fptr (sym_map, value, &__fptr_root, NULL);
-#else
-	  {
-	    struct ia64_fptr *p_boot_ldso_fptr;
-	    struct ia64_fptr **p_fptr_root;
-	    int *p_fptr_count;
-
-	    /* Special care must be taken to address these variables
-	       during bootstrap.  Further, since we don't know exactly
-	       when __fptr_next will be relocated, we index directly
-	       off __boot_ldso_fptr.  */
-	    asm ("addl %0 = @gprel(__boot_ldso_fptr#), gp\n\t"
-		 "addl %1 = @gprel(__fptr_root#), gp\n\t"
-		 "addl %2 = @gprel(__fptr_count#), gp"
-		 : "=r"(p_boot_ldso_fptr),
-	         "=r"(p_fptr_root),
-	         "=r"(p_fptr_count));
-
-	    /*
-	     * Go from the top - __ia64_make_fptr goes from the bottom,
-	     * this way we will never clash.
-	     */
-	    value = __ia64_make_fptr (sym_map, value, p_fptr_root,
-				      &p_boot_ldso_fptr[--*p_fptr_count]);
-	  }
-#endif
+	    value = __ia64_make_fptr (sym_map, sym, value);
 	  else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_PCREL64LSB))
-	    value -= (Elf64_Addr)reloc_addr & -16;
+	    value -= (Elf64_Addr) reloc_addr & -16;
 	  else
 	    assert (! "unexpected dynamic reloc type");
 	}
@@ -573,7 +565,7 @@ elf_machine_rela (struct link_map *map,
   if (R_IA64_FORMAT (r_type) == R_IA64_FORMAT_64LSB)
     *reloc_addr = value;
   else if (R_IA64_FORMAT (r_type) == R_IA64_FORMAT_32LSB)
-    *(int *)reloc_addr = value;
+    *(int *) reloc_addr = value;
   else if (r_type == R_IA64_IPLTLSB)
     {
       reloc_addr[0] = 0;
@@ -589,8 +581,8 @@ static inline void
 elf_machine_lazy_rel (struct link_map *map,
 		      Elf64_Addr l_addr, const Elf64_Rela *reloc)
 {
-  Elf64_Addr * const reloc_addr = (void *)(l_addr + reloc->r_offset);
-  unsigned long const r_type = ELF64_R_TYPE (reloc->r_info);
+  Elf64_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+  const unsigned long int r_type = ELF64_R_TYPE (reloc->r_info);
 
   if (r_type == R_IA64_IPLTLSB)
     {