From 7ec1221ff7a5e3faa4e58cdfeb3722b2958499e2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 3 Mar 2010 02:10:22 -0800
Subject: sparc: Use ba,a,pt in PLTs and fix bugs in R_SPARC_JMP_IREL handling.

2010-03-03  David S. Miller  <davem@davemloft.net>

	* sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must
	pass '1' for 't' argument to sparc_fixup_plt.
	* sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel):
	Likewise.
	* sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define.
	(sparc_fixup_plt): Document 't' argument.  Enable branch
	optimization and use v9 branches when possible.  Explain why we
	cannot unconditionally patch the branch into the first PLT
	instruction.
	* sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't'
	argument.  Use v9 branches when possible.  Explain why we can in
	fact unconditionally use a branch in the first PLT instruction
	here.
---
 sysdeps/sparc/sparc64/dl-machine.h |  2 +-
 sysdeps/sparc/sparc64/dl-plt.h     | 29 ++++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'sysdeps/sparc/sparc64')

diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h
index 4c915eb586..fcfbb06ac2 100644
--- a/sysdeps/sparc/sparc64/dl-machine.h
+++ b/sysdeps/sparc/sparc64/dl-machine.h
@@ -661,7 +661,7 @@ elf_machine_lazy_rel (struct link_map *map,
 	{
 	  /* 'high' is always zero, for large PLT entries the linker
 	     emits an R_SPARC_IRELATIVE.  */
-	  sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0);
+	  sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1);
 	}
       else
 	*reloc_addr = value;
diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h
index e06be43a0a..ca2fe3bbd8 100644
--- a/sysdeps/sparc/sparc64/dl-plt.h
+++ b/sysdeps/sparc/sparc64/dl-plt.h
@@ -28,7 +28,14 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
   Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr;
   Elf64_Sxword disp = value - plt_vaddr;
 
-  /* Now move plt_vaddr up to the call instruction.  */
+  /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap,
+     in which case we'll be resolving all PLT entries and thus can
+     optimize by overwriting instructions starting at the first PLT entry
+     instruction and we need not be mindful of thread safety.
+
+     Otherwise, 't' is '1'.
+
+     Now move plt_vaddr up to the call instruction.  */
   plt_vaddr += ((t + 1) * 4);
 
   /* PLT entries .PLT32768 and above look always the same.  */
@@ -39,10 +46,22 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
   /* Near destination.  */
   else if (disp >= -0x800000 && disp < 0x800000)
     {
-      /* As this is just one instruction, it is thread safe and so
-	 we can avoid the unnecessary sethi FOO, %g1.
-	 b,a target  */
-      insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff);
+      unsigned int insn;
+
+      /* ba,a */
+      insn = 0x30800000 | ((disp >> 2) & 0x3fffff);
+
+      if (disp >= -0x100000 && disp < 0x100000)
+	{
+	  /* ba,a,pt %icc */
+	  insn = 0x30480000  | ((disp >> 2) & 0x07ffff);
+	}
+
+      /* As this is just one instruction, it is thread safe and so we
+	 can avoid the unnecessary sethi FOO, %g1.  Each 64-bit PLT
+	 entry is 8 instructions long, so we can't run into the 'jmp'
+	 delay slot problems 32-bit PLTs can.  */
+      insns[0] = insn;
       __asm __volatile ("flush %0" : : "r" (insns));
     }
   /* 32-bit Sparc style, the target is in the lower 32-bits of
-- 
cgit 1.4.1