summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2005-05-28 23:40:09 +0000
committerRichard Henderson <rth@redhat.com>2005-05-28 23:40:09 +0000
commit4d87b39422b38536552be9de805675e63a9e15c2 (patch)
tree7c01cfb7fc5d825a1657b08e84c82ad3a634f2ee
parent7427c5defa19d8bc4c8a14690c5b3fa4653a1d2c (diff)
downloadglibc-4d87b39422b38536552be9de805675e63a9e15c2.tar.gz
glibc-4d87b39422b38536552be9de805675e63a9e15c2.tar.xz
glibc-4d87b39422b38536552be9de805675e63a9e15c2.zip
* elf/elf.h (DT_ALPHA_PLTRO, DT_ALPHA_NUM): New. * sysdeps/alpha/dl-dtprocnum.h: New file. * sysdeps/alpha/dl-machine.h (DT_ALPHA): New. (elf_machine_load_address): Simplify to rely on gprel relocations. (elf_machine_runtime_setup): Handle DT_ALPHA_PLTRO plt format. Remove thread safety workaround for binutils 2.6. (elf_machine_fixup_plt): Handle DT_ALPHA_PLTRO plt format. * sysdeps/alpha/dl-trampoline.S (_dl_runtime_resolve_new): New. (_dl_runtime_profile_new): New. (_dl_runtime_resolve_old): Rename from _dl_runtime_resolve. (_dl_runtime_profile_old): Rename from _dl_runtime_profile. Fix typo in _dl_call_pltexit argument loading.
        * sysdeps/alpha/div_libc.h (funcnoplt): New.
        * sysdeps/alpha/divl.S, sysdeps/alpha/divq.S: Use it.
        * sysdeps/alpha/divqu.S, sysdeps/alpha/reml.S: Likewise.
        * sysdeps/alpha/remq.S, sysdeps/alpha/remqu.S: Likewise.
2005-05-28  Richard Henderson  <rth@redhat.com>

	* elf/elf.h (DT_ALPHA_PLTRO, DT_ALPHA_NUM): New.
	* sysdeps/alpha/dl-dtprocnum.h: New file.
	* sysdeps/alpha/dl-machine.h (DT_ALPHA): New.
	(elf_machine_load_address): Simplify to rely on gprel relocations.
	(elf_machine_runtime_setup): Handle DT_ALPHA_PLTRO plt format.
	Remove thread safety workaround for binutils 2.6.
	(elf_machine_fixup_plt): Handle DT_ALPHA_PLTRO plt format.
	* sysdeps/alpha/dl-trampoline.S (_dl_runtime_resolve_new): New.
	(_dl_runtime_profile_new): New.
	(_dl_runtime_resolve_old): Rename from _dl_runtime_resolve.
	(_dl_runtime_profile_old): Rename from _dl_runtime_profile.  Fix
	typo in _dl_call_pltexit argument loading.

	* sysdeps/alpha/div_libc.h (funcnoplt): New.
	* sysdeps/alpha/divl.S, sysdeps/alpha/divq.S: Use it.
	* sysdeps/alpha/divqu.S, sysdeps/alpha/reml.S: Likewise.
	* sysdeps/alpha/remq.S, sysdeps/alpha/remqu.S: Likewise.
-rw-r--r--ChangeLog20
-rw-r--r--elf/elf.h3
-rw-r--r--sysdeps/alpha/div_libc.h6
-rw-r--r--sysdeps/alpha/divl.S2
-rw-r--r--sysdeps/alpha/divq.S2
-rw-r--r--sysdeps/alpha/divqu.S2
-rw-r--r--sysdeps/alpha/dl-dtprocnum.h3
-rw-r--r--sysdeps/alpha/dl-machine.h146
-rw-r--r--sysdeps/alpha/dl-trampoline.S222
-rw-r--r--sysdeps/alpha/reml.S2
-rw-r--r--sysdeps/alpha/remq.S2
-rw-r--r--sysdeps/alpha/remqu.S2
12 files changed, 301 insertions, 111 deletions
diff --git a/ChangeLog b/ChangeLog
index cc6239126d..0e821a89b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2005-05-28  Richard Henderson  <rth@redhat.com>
+
+	* elf/elf.h (DT_ALPHA_PLTRO, DT_ALPHA_NUM): New.
+	* sysdeps/alpha/dl-dtprocnum.h: New file.
+	* sysdeps/alpha/dl-machine.h (DT_ALPHA): New.
+	(elf_machine_load_address): Simplify to rely on gprel relocations.
+	(elf_machine_runtime_setup): Handle DT_ALPHA_PLTRO plt format.
+	Remove thread safety workaround for binutils 2.6.
+	(elf_machine_fixup_plt): Handle DT_ALPHA_PLTRO plt format.
+	* sysdeps/alpha/dl-trampoline.S (_dl_runtime_resolve_new): New.
+	(_dl_runtime_profile_new): New.
+	(_dl_runtime_resolve_old): Rename from _dl_runtime_resolve.
+	(_dl_runtime_profile_old): Rename from _dl_runtime_profile.  Fix
+	typo in _dl_call_pltexit argument loading.
+
+	* sysdeps/alpha/div_libc.h (funcnoplt): New.
+	* sysdeps/alpha/divl.S, sysdeps/alpha/divq.S: Use it.
+	* sysdeps/alpha/divqu.S, sysdeps/alpha/reml.S: Likewise.
+	* sysdeps/alpha/remq.S, sysdeps/alpha/remqu.S: Likewise.
+
 2005-05-26  Andreas Schwab  <schwab@suse.de>
 
 	* locale/Makefile (CFLAGS-loadlocale.c): Don't define.
diff --git a/elf/elf.h b/elf/elf.h
index e246519fab..5ae696da6c 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -1866,6 +1866,9 @@ typedef Elf32_Addr Elf32_Conflict;
 #define LITUSE_ALPHA_TLS_GD	4
 #define LITUSE_ALPHA_TLS_LDM	5
 
+/* Legal values for d_tag of Elf64_Dyn.  */
+#define DT_ALPHA_PLTRO		0x70000000
+#define DT_ALPHA_NUM		1
 
 /* PowerPC specific declarations */
 
diff --git a/sysdeps/alpha/div_libc.h b/sysdeps/alpha/div_libc.h
index 62b4470355..b731b02e25 100644
--- a/sysdeps/alpha/div_libc.h
+++ b/sysdeps/alpha/div_libc.h
@@ -34,6 +34,12 @@
 #define RV	t12
 #define RA	t9
 
+/* The secureplt format does not allow the division routines to be called
+   via plt; there aren't enough registers free to be clobbered.  Avoid 
+   setting the symbol type to STT_FUNC, so that the linker won't be tempted
+   to create a plt entry.  */
+#define funcnoplt notype
+
 /* None of these functions should use implicit anything.  */
 	.set	nomacro
 	.set	noat
diff --git a/sysdeps/alpha/divl.S b/sysdeps/alpha/divl.S
index 408d66db00..9bac0450d0 100644
--- a/sysdeps/alpha/divl.S
+++ b/sysdeps/alpha/divl.S
@@ -36,7 +36,7 @@
 	.text
 	.align	4
 	.globl	__divl
-	.type	__divl, @function
+	.type	__divl, @funcnoplt
 	.usepv	__divl, no
 
 	cfi_startproc
diff --git a/sysdeps/alpha/divq.S b/sysdeps/alpha/divq.S
index 7f245ac056..d2ed2c5af6 100644
--- a/sysdeps/alpha/divq.S
+++ b/sysdeps/alpha/divq.S
@@ -43,7 +43,7 @@
 	.text
 	.align	4
 	.globl	__divq
-	.type	__divq, @function
+	.type	__divq, @funcnoplt
 	.usepv	__divq, no
 
 	cfi_startproc
diff --git a/sysdeps/alpha/divqu.S b/sysdeps/alpha/divqu.S
index fc00fa133c..f2a8a4d533 100644
--- a/sysdeps/alpha/divqu.S
+++ b/sysdeps/alpha/divqu.S
@@ -43,7 +43,7 @@
 	.text
 	.align	4
 	.globl	__divqu
-	.type	__divqu, @function
+	.type	__divqu, @funcnoplt
 	.usepv	__divqu, no
 
 	cfi_startproc
diff --git a/sysdeps/alpha/dl-dtprocnum.h b/sysdeps/alpha/dl-dtprocnum.h
new file mode 100644
index 0000000000..67845cdd62
--- /dev/null
+++ b/sysdeps/alpha/dl-dtprocnum.h
@@ -0,0 +1,3 @@
+/* Number of extra dynamic section entries for this architecture.  By
+   default there are none.  */
+#define DT_THISPROCNUM	DT_ALPHA_NUM
diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h
index 173a4111f8..88c357ea07 100644
--- a/sysdeps/alpha/dl-machine.h
+++ b/sysdeps/alpha/dl-machine.h
@@ -33,6 +33,9 @@
    where the dynamic linker should not map anything.  */
 #define ELF_MACHINE_USER_ADDRESS_MASK	0x120000000UL
 
+/* Translate a processor specific dynamic tag to the index in l_info array.  */
+#define DT_ALPHA(x) (DT_ALPHA_##x - DT_LOPROC + DT_NUM)
+
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int
 elf_machine_matches_host (const Elf64_Ehdr *ehdr)
@@ -55,105 +58,74 @@ elf_machine_dynamic (void)
 }
 
 /* Return the run-time load address of the shared object.  */
+
 static inline Elf64_Addr
 elf_machine_load_address (void)
 {
-  /* NOTE: While it is generally unfriendly to put data in the text
-     segment, it is only slightly less so when the "data" is an
-     instruction.  While we don't have to worry about GLD just yet, an
-     optimizing linker might decide that our "data" is an unreachable
-     instruction and throw it away -- with the right switches, DEC's
-     linker will do this.  What ought to happen is we should add
-     something to GAS to allow us access to the new GPREL_HI32/LO32
-     relocation types stolen from OSF/1 3.0.  */
-  /* This code relies on the fact that BRADDR relocations do not
-     appear in dynamic relocation tables.  Not that that would be very
-     useful anyway -- br/bsr has a 4MB range and the shared libraries
-     are usually many many terabytes away.  */
-
-  Elf64_Addr dot;
-  long int zero_disp;
-
-  asm("br %0, 1f\n"
-      "0:\n\t"
-      "br $0, 2f\n"
-      "1:\n\t"
-      ".section\t.data\n"
-      "2:\n\t"
-      ".quad 0b\n\t"
-      ".previous"
-      : "=r"(dot));
-
-  zero_disp = *(int *) dot;
-  zero_disp = (zero_disp << 43) >> 41;
-
-  return dot - *(Elf64_Addr *) (dot + 4 + zero_disp);
+  /* This relies on the compiler using gp-relative addresses for static symbols.  */
+  static void *dot = &dot;
+  return (void *)&dot - dot;
 }
 
 /* Set up the loaded object described by L so its unrelocated PLT
    entries will jump to the on-demand fixup code in dl-runtime.c.  */
 
 static inline int
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 {
-  Elf64_Addr plt;
-  extern void _dl_runtime_resolve (void);
-  extern void _dl_runtime_profile (void);
+  extern char _dl_runtime_resolve_new[] attribute_hidden;
+  extern char _dl_runtime_profile_new[] attribute_hidden;
+  extern char _dl_runtime_resolve_old[] attribute_hidden;
+  extern char _dl_runtime_profile_old[] attribute_hidden;
+
+  struct pltgot {
+    char *resolve;
+    struct link_map *link;
+  };
+
+  struct pltgot *pg;
+  long secureplt;
+  char *resolve;
+
+  if (map->l_info[DT_JMPREL] == 0 || !lazy)
+    return lazy;
+
+  /* Check to see if we're using the read-only plt form.  */
+  secureplt = map->l_info[DT_ALPHA(PLTRO)] != 0;
+
+  /* If the binary uses the read-only secure plt format, PG points to
+     the .got.plt section, which is the right place for ld.so to place
+     its hooks.  Otherwise, PG is currently pointing at the start of
+     the plt; the hooks go at offset 16.  */
+  pg = (struct pltgot *) D_PTR (map, l_info[DT_PLTGOT]);
+  pg += !secureplt;
+
+  /* This function will be called to perform the relocation.  They're
+     not declared as functions to convince the compiler to use gp
+     relative relocations for them.  */
+  if (secureplt)
+    resolve = _dl_runtime_resolve_new;
+  else
+    resolve = _dl_runtime_resolve_old;
 
-  if (l->l_info[DT_JMPREL] && lazy)
+  if (__builtin_expect (profile, 0))
     {
-      /* The GOT entries for the functions in the PLT have not been
-	 filled in yet.  Their initial contents are directed to the
-	 PLT which arranges for the dynamic linker to be called.  */
-      plt = D_PTR (l, l_info[DT_PLTGOT]);
-
-      /* This function will be called to perform the relocation.  */
-      if (__builtin_expect (profile, 0))
-	{
-	  *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_profile;
-
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
-	    {
-	      /* This is the object we are looking for.  Say that we really
-		 want profiling and the timers are started.  */
-	      GL(dl_profile_map) = l;
-	    }
-	}
+      if (secureplt)
+	resolve = _dl_runtime_profile_new;
       else
-        *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_resolve;
-
-      /* Identify this shared object */
-      *(Elf64_Addr *)(plt + 24) = (Elf64_Addr) l;
+	resolve = _dl_runtime_profile_old;
 
-      /* If the first instruction of the plt entry is not
-	 "br $28, plt0", we have to reinitialize .plt for lazy relocation.  */
-      if (*(unsigned int *)(plt + 32) != 0xc39ffff7)
+      if (GLRO(dl_profile) && _dl_name_match_p (GLRO(dl_profile), map))
 	{
-	  unsigned int val = 0xc39ffff7;
-	  unsigned int *slot, *end;
-	  const Elf64_Rela *rela = (const Elf64_Rela *)
-				   D_PTR (l, l_info[DT_JMPREL]);
-	  Elf64_Addr l_addr = l->l_addr;
-
-	  /* br t12,.+4; ldq t12,12(t12); nop; jmp t12,(t12),.+4 */
-	  *(unsigned long *)plt = 0xa77b000cc3600000;
-	  *(unsigned long *)(plt + 8) = 0x6b7b000047ff041f;
-	  slot = (unsigned int *)(plt + 32);
-	  end = (unsigned int *)(plt + 32
-				 + l->l_info[DT_PLTRELSZ]->d_un.d_val / 2);
-	  while (slot < end)
-	    {
-	      /* br at,.plt+0 */
-	      *slot = val;
-	      *(Elf64_Addr *) rela->r_offset = (Elf64_Addr) slot - l_addr;
-	      val -= 3;
-	      slot += 3;
-	      ++rela;
-	    }
+	  /* This is the object we are looking for.  Say that we really
+	     want profiling and the timers are started.  */
+	  GL(dl_profile_map) = map;
 	}
     }
 
+  pg->resolve = resolve;
+  pg->link = map;
+
   return lazy;
 }
 
@@ -280,7 +252,7 @@ $fixup_stack:							\n\
 /* Fix up the instructions of a PLT entry to invoke the function
    rather than the dynamic linker.  */
 static inline Elf64_Addr
-elf_machine_fixup_plt (struct link_map *l, lookup_t t,
+elf_machine_fixup_plt (struct link_map *map, lookup_t t,
 		       const Elf64_Rela *reloc,
 		       Elf64_Addr *got_addr, Elf64_Addr value)
 {
@@ -291,10 +263,16 @@ elf_machine_fixup_plt (struct link_map *l, lookup_t t,
   /* Store the value we are going to load.  */
   *got_addr = value;
 
+  /* If this binary uses the read-only secure plt format, we're done.  */
+  if (map->l_info[DT_ALPHA(PLTRO)])
+    return value;
+
+  /* Otherwise we have to modify the plt entry in place to do the branch.  */
+
   /* Recover the PLT entry address by calculating reloc's index into the
      .rela.plt, and finding that entry in the .plt.  */
-  rela_plt = (void *) D_PTR (l, l_info[DT_JMPREL]);
-  plte = (void *) (D_PTR (l, l_info[DT_PLTGOT]) + 32);
+  rela_plt = (const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL]);
+  plte = (Elf64_Word *) (D_PTR (map, l_info[DT_PLTGOT]) + 32);
   plte += 3 * (reloc - rela_plt);
 
   /* Find the displacement from the plt entry to the function.  */
diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S
index 42350836ef..c52efbb3bc 100644
--- a/sysdeps/alpha/dl-trampoline.S
+++ b/sysdeps/alpha/dl-trampoline.S
@@ -21,13 +21,202 @@
 
 	.set noat
 
-	.globl	_dl_runtime_resolve
-	.ent	_dl_runtime_resolve
+.macro savei regno, offset
+	stq	$\regno, \offset($30)
+	cfi_rel_offset(\regno, \offset)
+.endm
+
+.macro savef regno, offset
+	stt	$f\regno, \offset($30)
+	cfi_rel_offset(\regno+32, \offset)
+.endm
+
+	.align	4
+	.globl	_dl_runtime_resolve_new
+	.ent	_dl_runtime_resolve_new
+
+#undef FRAMESIZE
+#define FRAMESIZE	14*8
+
+_dl_runtime_resolve_new:
+	.frame	$30, FRAMESIZE, $26, 0
+	.mask	0x4000000, 0
+
+	ldah	$29, 0($27)		!gpdisp!1
+	lda	$30, -FRAMESIZE($30)
+	stq	$26, 0*8($30)
+	stq	$16, 2*8($30)
+
+	stq	$17, 3*8($30)
+	lda	$29, 0($29)		!gpdisp!1
+	stq	$18, 4*8($30)
+	mov	$28, $16		/* link_map from .got.plt */
+
+	stq	$19, 5*8($30)
+	mov	$25, $17		/* offset of reloc entry */
+	stq	$20, 6*8($30)
+	mov	$26, $18		/* return address */
+
+	stq	$21, 7*8($30)
+	stt	$f16, 8*8($30)
+	stt	$f17, 9*8($30)
+	stt	$f18, 10*8($30)
+
+	stt	$f19, 11*8($30)
+	stt	$f20, 12*8($30)
+	stt	$f21, 13*8($30)
+	.prologue 2
+
+	bsr	$26, _dl_fixup		!samegp
+	mov	$0, $27
+
+	ldq	$26, 0*8($30)
+	ldq	$16, 2*8($30)
+	ldq	$17, 3*8($30)
+	ldq	$18, 4*8($30)
+	ldq	$19, 5*8($30)
+	ldq	$20, 6*8($30)
+	ldq	$21, 7*8($30)
+	ldt	$f16, 8*8($30)
+	ldt	$f17, 9*8($30)
+	ldt	$f18, 10*8($30)
+	ldt	$f19, 11*8($30)
+	ldt	$f20, 12*8($30)
+	ldt	$f21, 13*8($30)
+	lda	$30, FRAMESIZE($30)
+	jmp	$31, ($27), 0
+	.end	_dl_runtime_resolve_new
+
+	.globl	_dl_runtime_profile_new
+	.type	_dl_runtime_profile_new, @function
+
+#undef FRAMESIZE
+#define FRAMESIZE	20*8
+
+	/* We save the registers in a different order than desired by
+	   .mask/.fmask, so we have to use explicit cfi directives.  */
+	cfi_startproc
+
+_dl_runtime_profile_new:
+	ldah	$29, 0($27)		!gpdisp!2
+	lda	$30, -FRAMESIZE($30)
+	savei	26, 0*8
+	stq	$16, 2*8($30)
+
+	stq	$17, 3*8($30)
+	lda	$29, 0($29)		!gpdisp!2
+	stq	$18, 4*8($30)
+	lda	$1, FRAMESIZE($30)	/* incoming sp value */
+
+	stq	$1, 1*8($30)
+	stq	$19, 5*8($30)
+	stq	$20, 6*8($30)
+	mov	$28, $16		/* link_map from .got.plt */
+
+	stq	$21, 7*8($30)
+	mov	$25, $17		/* offset of reloc entry */
+	stt	$f16, 8*8($30)
+	mov	$26, $18		/* return address */
+
+	stt	$f17, 9*8($30)
+	mov	$30, $19		/* La_alpha_regs address */
+	stt	$f18, 10*8($30)
+	lda	$20, 14*8($30)		/* framesize address */
+
+	stt	$f19, 11*8($30)
+	stt	$f20, 12*8($30)
+	stt	$f21, 13*8($30)
+	stq	$28, 16*8($30)
+	stq	$25, 17*8($30)
+
+	bsr	$26, _dl_profile_fixup	!samegp
+	mov	$0, $27
+
+	/* Discover if we're wrapping this call.  */
+	ldq	$18, 14*8($30)
+	bge	$18, 1f
+
+	ldq	$26, 0*8($30)
+	ldq	$16, 2*8($30)
+	ldq	$17, 3*8($30)
+	ldq	$18, 4*8($30)
+	ldq	$19, 5*8($30)
+	ldq	$20, 6*8($30)
+	ldq	$21, 7*8($30)
+	ldt	$f16, 8*8($30)
+	ldt	$f17, 9*8($30)
+	ldt	$f18, 10*8($30)
+	ldt	$f19, 11*8($30)
+	ldt	$f20, 12*8($30)
+	ldt	$f21, 13*8($30)
+	lda	$30, FRAMESIZE($30)
+	jmp	$31, ($27), 0
+
+1:
+	/* Create a frame pointer and allocate a new argument frame.  */
+	savei	15, 15*8
+	mov	$30, $15
+	cfi_def_cfa_register (15)
+	addq	$18, 15, $18
+	bic	$18, 15, $18
+	subq	$30, $18, $30
+
+	/* Save the call destination around memcpy.  */
+	stq	$0, 14*8($30)
+
+	/* Copy the stack arguments into place.  */
+	lda	$16, 0($30)
+	lda	$17, FRAMESIZE($15)
+	jsr	$26, memcpy
+	ldgp	$29, 0($26)
+
+	/* Reload the argument registers.  */
+	ldq	$27, 14*8($30)
+	ldq	$16, 2*8($15)
+	ldq	$17, 3*8($15)
+	ldq	$18, 4*8($15)
+	ldq	$19, 5*8($15)
+	ldq	$20, 6*8($15)
+	ldq	$21, 7*8($15)
+	ldt	$f16, 8*8($15)
+	ldt	$f17, 9*8($15)
+	ldt	$f18, 10*8($15)
+	ldt	$f19, 11*8($15)
+	ldt	$f20, 12*8($15)
+	ldt	$f21, 13*8($15)
+
+	jsr	$26, ($27), 0
+	ldgp	$29, 0($26)
+
+	/* Set up for call to _dl_call_pltexit.  */
+	ldq	$16, 16*8($15)
+	ldq	$17, 17*8($15)
+	stq	$0, 16*8($15)
+	lda	$18, 0($15)
+	stq	$1, 17*8($15)
+	lda	$19, 16*8($15)
+	stt	$f0, 18*8($15)
+	stt	$f1, 19*8($15)
+	bsr	$26, _dl_call_pltexit	!samegp
+
+	mov	$15, $30
+	cfi_def_cfa_register (30)
+	ldq	$26, 0($30)
+	ldq	$15, 15*8($30)
+	lda	$30, FRAMESIZE($30)
+	ret
+
+	cfi_endproc
+	.size	_dl_runtime_profile_new, .-_dl_runtime_profile_new
+
+	.align	4
+	.globl	_dl_runtime_resolve_old
+	.ent	_dl_runtime_resolve_old
 
 #undef FRAMESIZE
 #define FRAMESIZE	44*8
 
-_dl_runtime_resolve:
+_dl_runtime_resolve_old:
 	lda	$30, -FRAMESIZE($30)
 	.frame	$30, FRAMESIZE, $26
 	/* Preserve all registers that C normally doesn't.  */
@@ -146,30 +335,21 @@ _dl_runtime_resolve:
 	lda	$30, FRAMESIZE($30)
 	jmp	$31, ($27)
 
-	.end	_dl_runtime_resolve
+	.end	_dl_runtime_resolve_old
 
-	.globl	_dl_runtime_profile
-	.usepv	_dl_runtime_profile, no
-	.type	_dl_runtime_profile, @function
+	.globl	_dl_runtime_profile_old
+	.usepv	_dl_runtime_profile_old, no
+	.type	_dl_runtime_profile_old, @function
 
 	/* We save the registers in a different order than desired by
 	   .mask/.fmask, so we have to use explicit cfi directives.  */
 	cfi_startproc
 
-.macro savei regno, offset
-	stq	$\regno, \offset($30)
-	cfi_rel_offset(\regno, \offset)
-.endm
-
-.macro savef regno, offset
-	stt	$f\regno, \offset($30)
-	cfi_rel_offset(\regno+32, \offset)
-.endm
-
 #undef FRAMESIZE
 #define FRAMESIZE	50*8
 
-_dl_runtime_profile:
+	.align	4
+_dl_runtime_profile_old:
 	lda	$30, -FRAMESIZE($30)
 	cfi_adjust_cfa_offset (FRAMESIZE)
 
@@ -340,8 +520,8 @@ _dl_runtime_profile:
 	ldgp	$29, 0($26)
 
 	/* Set up for call to _dl_call_pltexit.  */
-	ldq	$16, 48($15)
-	ldq	$17, 49($15)
+	ldq	$16, 48*8($15)
+	ldq	$17, 49*8($15)
 	stq	$0, 46*8($15)
 	lda	$18, 0($15)
 	stq	$1, 47*8($15)
@@ -358,4 +538,4 @@ _dl_runtime_profile:
 	ret
 
 	cfi_endproc
-	.size	_dl_runtime_profile, .-_dl_runtime_profile
+	.size	_dl_runtime_profile_old, .-_dl_runtime_profile_old
diff --git a/sysdeps/alpha/reml.S b/sysdeps/alpha/reml.S
index bfc3be5c3f..ae291b0505 100644
--- a/sysdeps/alpha/reml.S
+++ b/sysdeps/alpha/reml.S
@@ -38,7 +38,7 @@
 	.text
 	.align	4
 	.globl	__reml
-	.type	__reml, @function
+	.type	__reml, @funcnoplt
 	.usepv	__reml, no
 
 	cfi_startproc
diff --git a/sysdeps/alpha/remq.S b/sysdeps/alpha/remq.S
index 645a834453..64e958bb95 100644
--- a/sysdeps/alpha/remq.S
+++ b/sysdeps/alpha/remq.S
@@ -43,7 +43,7 @@
 	.text
 	.align	4
 	.globl	__remq
-	.type	__remq, @function
+	.type	__remq, @funcnoplt
 	.usepv	__remq, no
 
 	cfi_startproc
diff --git a/sysdeps/alpha/remqu.S b/sysdeps/alpha/remqu.S
index bfa78dff57..dcc1c88b3c 100644
--- a/sysdeps/alpha/remqu.S
+++ b/sysdeps/alpha/remqu.S
@@ -43,7 +43,7 @@
 	.text
 	.align	4
 	.globl	__remqu
-	.type	__remqu, @function
+	.type	__remqu, @funcnoplt
 	.usepv	__remqu, no
 
 	cfi_startproc