From 7ec1221ff7a5e3faa4e58cdfeb3722b2958499e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Mar 2010 02:10:22 -0800 Subject: sparc: Use ba,a,pt in PLTs and fix bugs in R_SPARC_JMP_IREL handling. 2010-03-03 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must pass '1' for 't' argument to sparc_fixup_plt. * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel): Likewise. * sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define. (sparc_fixup_plt): Document 't' argument. Enable branch optimization and use v9 branches when possible. Explain why we cannot unconditionally patch the branch into the first PLT instruction. * sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't' argument. Use v9 branches when possible. Explain why we can in fact unconditionally use a branch in the first PLT instruction here. --- sysdeps/sparc/sparc64/dl-machine.h | 2 +- sysdeps/sparc/sparc64/dl-plt.h | 29 ++++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'sysdeps/sparc/sparc64') diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h index 4c915eb586..fcfbb06ac2 100644 --- a/sysdeps/sparc/sparc64/dl-machine.h +++ b/sysdeps/sparc/sparc64/dl-machine.h @@ -661,7 +661,7 @@ elf_machine_lazy_rel (struct link_map *map, { /* 'high' is always zero, for large PLT entries the linker emits an R_SPARC_IRELATIVE. */ - sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0); + sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1); } else *reloc_addr = value; diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h index e06be43a0a..ca2fe3bbd8 100644 --- a/sysdeps/sparc/sparc64/dl-plt.h +++ b/sysdeps/sparc/sparc64/dl-plt.h @@ -28,7 +28,14 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr; Elf64_Sxword disp = value - plt_vaddr; - /* Now move plt_vaddr up to the call instruction. */ + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. + + Now move plt_vaddr up to the call instruction. */ plt_vaddr += ((t + 1) * 4); /* PLT entries .PLT32768 and above look always the same. */ @@ -39,10 +46,22 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, /* Near destination. */ else if (disp >= -0x800000 && disp < 0x800000) { - /* As this is just one instruction, it is thread safe and so - we can avoid the unnecessary sethi FOO, %g1. - b,a target */ - insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff); + unsigned int insn; + + /* ba,a */ + insn = 0x30800000 | ((disp >> 2) & 0x3fffff); + + if (disp >= -0x100000 && disp < 0x100000) + { + /* ba,a,pt %icc */ + insn = 0x30480000 | ((disp >> 2) & 0x07ffff); + } + + /* As this is just one instruction, it is thread safe and so we + can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT + entry is 8 instructions long, so we can't run into the 'jmp' + delay slot problems 32-bit PLTs can. */ + insns[0] = insn; __asm __volatile ("flush %0" : : "r" (insns)); } /* 32-bit Sparc style, the target is in the lower 32-bits of -- cgit 1.4.1