diff options
Diffstat (limited to 'sysdeps/powerpc/dl-machine.h')
-rw-r--r-- | sysdeps/powerpc/dl-machine.h | 396 |
1 files changed, 215 insertions, 181 deletions
diff --git a/sysdeps/powerpc/dl-machine.h b/sysdeps/powerpc/dl-machine.h index a60a29723d..541892cdfc 100644 --- a/sysdeps/powerpc/dl-machine.h +++ b/sysdeps/powerpc/dl-machine.h @@ -26,6 +26,14 @@ /* stuff for the PLT */ #define PLT_INITIAL_ENTRY_WORDS 18 #define PLT_LONGBRANCH_ENTRY_WORDS 10 +#define PLT_DOUBLE_SIZE (1<<13) +#define PLT_ENTRY_START_WORDS(entry_number) \ + (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 + \ + ((entry_number) > PLT_DOUBLE_SIZE ? \ + ((entry_number) - PLT_DOUBLE_SIZE)*2 : \ + 0)) +#define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries) + #define OPCODE_ADDI(rd,ra,simm) \ (0x38000000 | (rd) << 21 | (ra) << 16 | (simm) & 0xffff) #define OPCODE_ADDIS(rd,ra,simm) \ @@ -44,6 +52,19 @@ #define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm) #define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh) +#define PPC_DCBST(where) asm __volatile__ ("dcbst 0,%0" : : "r"(where)) ++#define PPC_SYNC asm __volatile__ ("sync") ++#define PPC_ISYNC asm __volatile__ ("sync; isync") ++#define PPC_ICBI(where) asm __volatile__ ("icbi 0,%0" : : "r"(where)) + +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + /* Return nonzero iff E_MACHINE is compatible with the running host. */ static inline int @@ -53,7 +74,8 @@ elf_machine_matches_host (Elf32_Half e_machine) } -/* Return the link-time address of _DYNAMIC, the first value in the GOT. */ +/* Return the link-time address of _DYNAMIC, stored as + the first value in the GOT. */ static inline Elf32_Addr elf_machine_dynamic (void) { @@ -79,11 +101,11 @@ elf_machine_load_address (void) get confused. asm ("bcl 20,31,0f ;" - "0: mflr 0 ;" - "lis %0,0b@ha;" - "addi %0,%0,0b@l;" - "subf %0,%0,0" - : "=b" (addr) : : "r0", "lr"); + "0: mflr 0 ;" + "lis %0,0b@ha;" + "addi %0,%0,0b@l;" + "subf %0,%0,0" + : "=b" (addr) : : "r0", "lr"); doesn't work, because the linker doesn't have to (and in fact doesn't) update the @ha and @l references; the loader (which runs after this @@ -134,7 +156,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, if (rinfo == R_PPC_NONE) return; - if (sym && ELF32_ST_TYPE (sym->st_info) == STT_SECTION || + assert (sym != NULL); + if (ELF32_ST_TYPE (sym->st_info) == STT_SECTION || rinfo == R_PPC_RELATIVE) { /* Has already been relocated. */ @@ -143,133 +166,124 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, } else { - assert (sym != NULL); - if (rinfo == R_PPC_JMP_SLOT) - loadbase = (Elf32_Word) (char *) RESOLVE (&sym, - version, DL_LOOKUP_NOPLT); + int flags; + + /* We never want to use a PLT entry as the destination of a + reloc, when what is being relocated is a branch. This is + partly for efficiency, but mostly so we avoid loops. */ + if (rinfo == R_PPC_REL24 || + rinfo == R_PPC_ADDR24 || + rinfo == R_PPC_JMP_SLOT) + flags = DL_LOOKUP_NOPLT; + else if (rinfo == R_PPC_COPY) + flags = DL_LOOKUP_NOEXEC; else - loadbase = (Elf32_Word) (char *) RESOLVE (&sym, version, 0); + flags = 0; + + loadbase = (Elf32_Word) (char *) (RESOLVE (&sym, version, flags)); if (sym == NULL) { /* Weak symbol that wasn't actually defined anywhere. */ - assert (loadbase == 0); + assert(loadbase == 0); finaladdr = reloc->r_addend; } else - finaladdr = (loadbase + (Elf32_Word)(char *)sym->st_value - + reloc->r_addend); + finaladdr = (loadbase + (Elf32_Word) (char *) sym->st_value + + reloc->r_addend); } - switch (rinfo) + /* This is an if/else if chain because GCC 2.7.2.[012] turns case + statements into non-PIC table lookups. When a later version + comes out that fixes this, this should be changed. */ + if (rinfo == R_PPC_ADDR16_LO) { - case R_PPC_UADDR16: - case R_PPC_ADDR16_LO: - case R_PPC_ADDR16: *(Elf32_Half*) reloc_addr = finaladdr; - break; - - case R_PPC_ADDR16_HI: + } + else if (rinfo == R_PPC_ADDR16_HI) + { *(Elf32_Half*) reloc_addr = finaladdr >> 16; - break; - - case R_PPC_ADDR16_HA: + } + else if (rinfo == R_PPC_ADDR16_HA) + { *(Elf32_Half*) reloc_addr = finaladdr + 0x8000 >> 16; - break; - - case R_PPC_REL24: - { - Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr; - assert (delta << 6 >> 6 == delta); - *reloc_addr = *reloc_addr & 0xfc000003 | delta & 0x3fffffc; - } - break; - - case R_PPC_UADDR32: - case R_PPC_GLOB_DAT: - case R_PPC_ADDR32: - case R_PPC_RELATIVE: + } + else if (rinfo == R_PPC_REL24) + { + Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr; + assert (delta << 6 >> 6 == delta); + *reloc_addr = *reloc_addr & 0xfc000003 | delta & 0x3fffffc; + } + else if (rinfo == R_PPC_UADDR32 || + rinfo == R_PPC_GLOB_DAT || + rinfo == R_PPC_ADDR32 || + rinfo == R_PPC_RELATIVE) + { *reloc_addr = finaladdr; - break; - - case R_PPC_ADDR24: + } + else if (rinfo == R_PPC_ADDR24) + { + assert (finaladdr << 6 >> 6 == finaladdr); *reloc_addr = *reloc_addr & 0xfc000003 | finaladdr & 0x3fffffc; - break; - - case R_PPC_REL14_BRTAKEN: - case R_PPC_REL14_BRNTAKEN: - case R_PPC_REL14: - { - Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr; - *reloc_addr = *reloc_addr & 0xffdf0003 | delta & 0xfffc; - if (rinfo == R_PPC_REL14_BRTAKEN && delta >= 0 || - rinfo == R_PPC_REL14_BRNTAKEN && delta < 0) - *reloc_addr |= 0x00200000; - } - break; - - case R_PPC_COPY: - { - /* Can't use memcpy (because we can't call any functions here). */ - int i; - for (i = 0; i < sym->st_size; ++i) - ((unsigned char *) reloc_addr)[i] = - ((unsigned char *)finaladdr)[i]; - } - break; - - case R_PPC_REL32: + } + else if (rinfo == R_PPC_COPY) + { + /* Memcpy is safe to use here, because ld.so doesn't have any + COPY relocs (it's self-contained). */ + memcpy (reloc_addr, (char *) finaladdr, sym->st_size); + } + else if (rinfo == R_PPC_REL32) + { *reloc_addr = finaladdr - (Elf32_Word) (char *) reloc_addr; - break; - - case R_PPC_JMP_SLOT: - if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000) - *reloc_addr = OPCODE_BA (finaladdr); + } + else if (rinfo == R_PPC_JMP_SLOT) + { + Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr; + if (delta << 6 >> 6 == delta) + *reloc_addr = OPCODE_B(delta); + else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000) + *reloc_addr = OPCODE_BA(finaladdr); else { - Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr; - if (delta <= 0x01fffffc && delta >= 0xfe000000) - *reloc_addr = OPCODE_B (delta); + Elf32_Word *plt = (Elf32_Word *)((char *)map->l_addr + + map->l_info[DT_PLTGOT]->d_un.d_val); + Elf32_Word index = (reloc_addr - plt - PLT_INITIAL_ENTRY_WORDS)/2; + Elf32_Word offset = index * 2 + PLT_INITIAL_ENTRY_WORDS; + + if (index >= PLT_DOUBLE_SIZE) + { + /* Slots greater than or equal to 2^13 have 4 words + available instead of two. */ + plt[offset ] = OPCODE_LI (11,finaladdr); + plt[offset+1] = OPCODE_ADDIS (11,11,finaladdr + 0x8000 >> 16); + plt[offset+2] = OPCODE_MTCTR (11); + plt[offset+3] = OPCODE_BCTR (); + } else { - Elf32_Word *plt = - (Elf32_Word *) ((char *) map->l_addr - + map->l_info[DT_PLTGOT]->d_un.d_val); - Elf32_Word index =((reloc_addr - plt - PLT_INITIAL_ENTRY_WORDS) - / 2); - int num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val - / sizeof (Elf32_Rela)); - int rel_offset_words = (PLT_INITIAL_ENTRY_WORDS - + num_plt_entries * 2); - - if (index >= (1 << 13)) - { - /* Indexes greater than or equal to 2^13 have 4 - words available instead of two. */ - plt[index * 2 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_LI (11, finaladdr); - plt[index * 2 + 1 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_ADDIS (11, 11, finaladdr + 0x8000 >> 16); - plt[index * 2 + 2 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_MTCTR (11); - plt[index * 2 + 2 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_BCTR (); - } - else - { - plt[index * 2 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_LI (11, index * 4); - plt[index * 2 + 1 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_B(-(4 * (index * 2 + 1 + PLT_INITIAL_ENTRY_WORDS - + PLT_LONGBRANCH_ENTRY_WORDS))); - plt[index + rel_offset_words] = finaladdr; - } + Elf32_Word num_plt_entries; + Elf32_Word rel_offset_words; + + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof(Elf32_Rela)); + rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries); + + plt[offset ] = OPCODE_LI (11,index * 4); + plt[offset+1] = OPCODE_B (-(4 * (offset + 1 + - PLT_LONGBRANCH_ENTRY_WORDS))); + plt[index + rel_offset_words] = finaladdr; } } - break; - - default: - assert (! "unexpected dynamic reloc type"); + MODIFIED_CODE(reloc_addr); } + else + assert (! "unexpected dynamic reloc type"); + + if (rinfo == R_PPC_ADDR16_LO || + rinfo == R_PPC_ADDR16_HI || + rinfo == R_PPC_ADDR16_HA || + rinfo == R_PPC_REL24 || + rinfo == R_PPC_ADDR24) + MODIFIED_CODE_NOQUEUE (reloc_addr); } #define ELF_MACHINE_NO_REL 1 @@ -282,64 +296,58 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, /* Set up the loaded object described by L so its unrelocated PLT entries will jump to the on-demand fixup code in dl-runtime.c. */ - -/* This code does not presently work if there are more than 2^13 PLT - entries. */ static inline void elf_machine_runtime_setup (struct link_map *map, int lazy) { - Elf32_Word *plt; - int i; - Elf32_Word num_plt_entries; - Elf32_Word rel_offset_words; - extern void _dl_runtime_resolve (void); - if (map->l_info[DT_JMPREL]) { + int i; /* Fill in the PLT. Its initial contents are directed to a function earlier in the PLT which arranges for the dynamic linker to be called back. */ - plt = (Elf32_Word *) ((char *) map->l_addr + - map->l_info[DT_PLTGOT]->d_un.d_val); - num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val - / sizeof (Elf32_Rela)); - rel_offset_words = PLT_INITIAL_ENTRY_WORDS + num_plt_entries * 2; + Elf32_Word *plt = (Elf32_Word *) ((char *) map->l_addr + + map->l_info[DT_PLTGOT]->d_un.d_val); + Elf32_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf32_Rela)); + Elf32_Word rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries); + extern void _dl_runtime_resolve (void); + Elf32_Word size_modified; if (lazy) for (i = 0; i < num_plt_entries; i++) - if (i >= (1 << 13)) - { - plt[i * 2 + (i - (1 << 13)) * 2 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_LI (11, i * 4); - plt[i * 2 + (i - (1 << 13)) * 2 + 1 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_ADDIS (11, 11, i * 4 + 0x8000 >> 16); - plt[i * 2 + (i - (1 << 13)) * 2 + 2 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_B (-(4 * ( i * 2 + 1 + PLT_INITIAL_ENTRY_WORDS))); - } - else - { - plt[i * 2 + PLT_INITIAL_ENTRY_WORDS] = OPCODE_LI (11, i * 4); - plt[i * 2 + 1 + PLT_INITIAL_ENTRY_WORDS] = - OPCODE_B (-(4 * (i * 2 + 1 + PLT_INITIAL_ENTRY_WORDS))); - } - - /* Multiply index of entry, by 0xC. */ - plt[0] = OPCODE_SLWI (12, 11, 1); - plt[1] = OPCODE_ADD (11, 12, 11); - if ((Elf32_Word) (char *) _dl_runtime_resolve <= 0x01fffffc || - (Elf32_Word) (char *) _dl_runtime_resolve >= 0xfe000000) { - plt[2] = OPCODE_LI (12, (Elf32_Word) (char *) map); - plt[3] = OPCODE_ADDIS (12, 12, - (Elf32_Word) (char *) map + 0x8000 >> 16); - plt[4] = OPCODE_BA ((Elf32_Word) (char *) _dl_runtime_resolve); + Elf32_Word offset = PLT_ENTRY_START_WORDS(i); + + if (i >= PLT_DOUBLE_SIZE) + { + plt[offset ] = OPCODE_LI (11, i * 4); + plt[offset+1] = OPCODE_ADDIS (11, 11, i * 4 + 0x8000 >> 16); + plt[offset+2] = OPCODE_B (-(4 * (offset + 2))); + } + else + { + plt[offset ] = OPCODE_LI (11, i * 4); + plt[offset+1] = OPCODE_B(-(4 * (offset + 1))); + } + + /* Multiply index of entry, by 0xC. */ + plt[0] = OPCODE_SLWI (12, 11, 1); + plt[1] = OPCODE_ADD (11, 12, 11); + if ((Elf32_Word) (char *) _dl_runtime_resolve <= 0x01fffffc + || (Elf32_Word) (char *) _dl_runtime_resolve >= 0xfe000000) + { + plt[2] = OPCODE_LI (12, (Elf32_Word) (char *) map); + plt[3] = OPCODE_ADDIS (12, 12, + (Elf32_Word) (char *) map + 0x8000 >> 16); + plt[4] = OPCODE_BA ((Elf32_Word) (char *) _dl_runtime_resolve); + } } else { plt[2] = OPCODE_LI (12, (Elf32_Word) (char *) _dl_runtime_resolve); - plt[3] = OPCODE_ADDIS (12, 12, 0x8000 + - ((Elf32_Word) (char *) _dl_runtime_resolve - >> 16)); + plt[3] = OPCODE_ADDIS(12, 12, 0x8000 + + ((Elf32_Word) (char *) _dl_runtime_resolve + >> 16)); plt[4] = OPCODE_MTCTR (12); plt[5] = OPCODE_LI (12, (Elf32_Word) (char *) map); plt[6] = OPCODE_ADDIS (12, 12, ((Elf32_Word) (char *) map @@ -347,21 +355,28 @@ elf_machine_runtime_setup (struct link_map *map, int lazy) plt[7] = OPCODE_BCTR (); } plt[PLT_LONGBRANCH_ENTRY_WORDS] = - OPCODE_ADDIS (11, 11, ((Elf32_Word) (char*) (plt+rel_offset_words) - + 0x8000 >> 16)); + OPCODE_ADDIS (11, 11, (Elf32_Word) (char*) (plt + rel_offset_words) + + 0x8000 >> 16); plt[PLT_LONGBRANCH_ENTRY_WORDS+1] = - OPCODE_LWZ (11, (Elf32_Word) (char*) (plt + rel_offset_words), 11); + OPCODE_LWZ(11,(Elf32_Word)(char*)(plt+rel_offset_words),11); plt[PLT_LONGBRANCH_ENTRY_WORDS+2] = OPCODE_MTCTR (11); plt[PLT_LONGBRANCH_ENTRY_WORDS+3] = OPCODE_BCTR (); + + size_modified = lazy ? rel_offset_words : PLT_INITIAL_ENTRY_WORDS; + /* Now we need to keep the caches in sync. */ + for (i = 0; i < size_modified; i+=8) + PPC_DCBST (plt + i); + PPC_SYNC; + for (i = 0; i < size_modified; i+=8) + PPC_ICBI (plt + i); + PPC_ISYNC; } } static inline void elf_machine_lazy_rel (struct link_map *map, const Elf32_Rela *reloc) { - if (ELF32_R_TYPE (reloc->r_info) != R_PPC_JMP_SLOT) - assert (! "unexpected PLT reloc type"); - + assert (ELF32_R_TYPE (reloc->r_info) == R_PPC_JMP_SLOT); /* elf_machine_runtime_setup handles this. */ } @@ -369,17 +384,23 @@ elf_machine_lazy_rel (struct link_map *map, const Elf32_Rela *reloc) #define elf_machine_relplt elf_machine_rela /* This code is used in dl-runtime.c to call the `fixup' function - and then redirect to the address it returns. */ + and then redirect to the address it returns. It is called + from code built in the PLT by elf_machine_runtime_setup. */ #define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\ .section \".text\" + .align 2 .globl _dl_runtime_resolve + .type _dl_runtime_resolve,@function _dl_runtime_resolve: + # We need to save the registers used to pass parameters. + # We build a stack frame to put them in. stwu 1,-48(1) mflr 0 stw 3,16(1) stw 4,20(1) stw 0,52(1) stw 5,24(1) + # We also need to save some of the condition register fields. mfcr 0 stw 6,28(1) stw 7,32(1) @@ -387,10 +408,13 @@ _dl_runtime_resolve: stw 9,40(1) stw 10,44(1) stw 0,12(1) + # The code that calls this has put parameters for `fixup' in r12 and r11. mr 3,12 mr 4,11 bl fixup + # 'fixup' returns the address we want to branch to. mtctr 3 + # Put the registers back... lwz 0,52(1) lwz 10,44(1) lwz 9,40(1) @@ -403,37 +427,40 @@ _dl_runtime_resolve: lwz 5,24(1) lwz 4,20(1) lwz 3,16(1) + # ...unwind the stack frame, and jump to the PLT entry we updated. addi 1,1,48 bctr +0: + .size _dl_runtime_resolve,0b-_dl_runtime_resolve + # undo '.section text'. + .previous "); /* Initial entry point code for the dynamic linker. The C function `_dl_start' is the real entry point; - its return value is the user program's entry point. */ - -/* FIXME! We don't make provision for calling _dl_fini, - because Linux/PPC is somewhat broken. */ + its return value is the user program's entry point. */ #define RTLD_START \ asm ("\ .text - .align 2 + .align 2 .globl _start - .type _start,@function + .type _start,@function _start: # We start with the following on the stack, from top: # argc (4 bytes) # arguments for program (terminated by NULL) # environment variables (terminated by NULL) # arguments for the program loader + # FIXME: perhaps this should do the same trick as elf/start.c? # Call _dl_start with one parameter pointing at argc - mr 3,1 + mr 3,1 # (we have to frob the stack pointer a bit to allow room for # _dl_start to save the link register) - li 4,0 - addi 1,1,-16 - stw 4,0(1) - bl _dl_start@local + li 4,0 + addi 1,1,-16 + stw 4,0(1) + bl _dl_start@local # Now, we do our main work of calling initialisation procedures. # The ELF ABI doesn't say anything about parameters for these, @@ -442,10 +469,10 @@ _start: # passed by value!). # put our GOT pointer in r31 - bl _GLOBAL_OFFSET_TABLE_-4@local - mflr 31 + bl _GLOBAL_OFFSET_TABLE_-4@local + mflr 31 # the address of _start in r30 - mr 30,3 + mr 30,3 # &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28 lwz 28,_dl_default_scope@got(31) lwz 29,_dl_argc@got(31) @@ -499,13 +526,19 @@ _start: mtlr 0 # and also clear _dl_starting_up lwz 26,_dl_starting_up@got(31) - stw 0,0(3) + stw 0,0(26) # go do it! bctr +0: + .size _start,0b-_start + # undo '.section text'. + .previous "); -#define ELF_PREFERRED_ADDRESS_DATA static ElfW(Addr) _dl_preferred_address = 0; -#define ELF_PREFERRED_ADDRESS(loader, maplength, mapstartpref) \ +#define ELF_PREFERRED_ADDRESS_DATA \ +static ElfW(Addr) _dl_preferred_address = 0; + +#define ELF_PREFERRED_ADDRESS(loader, maplength, mapstartpref) \ ( { \ ElfW(Addr) prefd; \ if (mapstartpref != 0 && _dl_preferred_address == 0) \ @@ -516,13 +549,14 @@ _start: prefd = 0; \ else \ prefd = _dl_preferred_address = \ - (_dl_preferred_address - maplength - 0x10000) & \ - ~(_dl_pagesize - 1); \ + ((_dl_preferred_address - maplength - 0x10000) \ + & ~(_dl_pagesize - 1)); \ prefd; \ } ) -#define ELF_FIXED_ADDRESS(loader, mapstart) \ + +#define ELF_FIXED_ADDRESS(loader, mapstart) \ ( { \ - if (mapstart != 0 && _dl_preferred_address == 0) \ + if (mapstart != 0 && _dl_preferred_address < mapstart) \ _dl_preferred_address = mapstart; \ } ) |