diff options
author | Igor Zamyatin <igor.zamyatin@intel.com> | 2014-04-01 10:16:04 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2014-04-09 15:38:09 -0700 |
commit | a4c75cfd56e536c2b18556e8a482d88dffa0fffc (patch) | |
tree | 209bb53676f4cee8fde2a1f9d038829dd78fe28c /sysdeps/x86_64/dl-trampoline.S | |
parent | 27822ce67fbf7f2b204992a410e7da2e8c1e2607 (diff) | |
download | glibc-a4c75cfd56e536c2b18556e8a482d88dffa0fffc.tar.gz glibc-a4c75cfd56e536c2b18556e8a482d88dffa0fffc.tar.xz glibc-a4c75cfd56e536c2b18556e8a482d88dffa0fffc.zip |
Save/restore bound registers in _dl_runtime_resolve
This patch saves and restores bound registers in symbol lookup for x86-64: 1. Branches without BND prefix clear bound registers. 2. x86-64 pass bounds in bound registers as specified in MPX psABI extension on hjl/mpx/master branch at https://github.com/hjl-tools/x86-64-psABI https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc Binutils has been updated to create an alternate PLT to add BND prefix when branching to ld.so. * config.h.in (HAVE_MPX_SUPPORT): New #undef. * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New macro. (REGISTER_SAVE_RAX): Likewise. (REGISTER_SAVE_RCX): Likewise. (REGISTER_SAVE_RDX): Likewise. (REGISTER_SAVE_RSI): Likewise. (REGISTER_SAVE_RDI): Likewise. (REGISTER_SAVE_R8): Likewise. (REGISTER_SAVE_R9): Likewise. (REGISTER_SAVE_BND0): Likewise. (REGISTER_SAVE_BND1): Likewise. (REGISTER_SAVE_BND2): Likewise. (_dl_runtime_resolve): Use them. Save and restore Intel MPX bound registers when calling _dl_fixup.
Diffstat (limited to 'sysdeps/x86_64/dl-trampoline.S')
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 99 |
1 files changed, 79 insertions, 20 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 77c4d0f147..3c0f54274a 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -24,6 +24,30 @@ # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes #endif +/* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ +#ifdef __ILP32__ +/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */ +# define REGISTER_SAVE_AREA (8 * 7) +# define REGISTER_SAVE_RAX 0 +#else +/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0, + BND1, BND2, BND3. */ +# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4) +/* Align bound register save area to 16 bytes. */ +# define REGISTER_SAVE_BND0 0 +# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16) +# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16) +# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16) +# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16) +#endif +#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8) +#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) +#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8) +#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8) +#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8) +#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) + .text .globl _dl_runtime_resolve .type _dl_runtime_resolve, @function @@ -31,28 +55,63 @@ cfi_startproc _dl_runtime_resolve: cfi_adjust_cfa_offset(16) # Incorporate PLT - subq $56,%rsp - cfi_adjust_cfa_offset(56) - movq %rax,(%rsp) # Preserve registers otherwise clobbered. - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - movq 64(%rsp), %rsi # Copy args pushed by PLT in register. - movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index + subq $REGISTER_SAVE_AREA,%rsp + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) + # Preserve registers otherwise clobbered. + movq %rax, REGISTER_SAVE_RAX(%rsp) + movq %rcx, REGISTER_SAVE_RCX(%rsp) + movq %rdx, REGISTER_SAVE_RDX(%rsp) + movq %rsi, REGISTER_SAVE_RSI(%rsp) + movq %rdi, REGISTER_SAVE_RDI(%rsp) + movq %r8, REGISTER_SAVE_R8(%rsp) + movq %r9, REGISTER_SAVE_R9(%rsp) +#ifndef __ILP32__ + # We also have to preserve bound registers. These are nops if + # Intel MPX isn't available or disabled. +# ifdef HAVE_MPX_SUPPORT + bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) + bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) + bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) + bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) +# else + .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 + .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 + .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 + .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 +# endif +#endif + # Copy args pushed by PLT in register. + # %rdi: link_map, %rsi: reloc_index + movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi + movq REGISTER_SAVE_AREA(%rsp), %rdi call _dl_fixup # Call resolver. movq %rax, %r11 # Save return value - movq 48(%rsp), %r9 # Get register content back. - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $72, %rsp # Adjust stack(PLT did 2 pushes) - cfi_adjust_cfa_offset(-72) +#ifndef __ILP32__ + # Restore bound registers. These are nops if Intel MPX isn't + # avaiable or disabled. +# ifdef HAVE_MPX_SUPPORT + bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 + bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 + bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 + bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 +# else + .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 + .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 + .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 + .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 +# endif +#endif + # Get register content back. + movq REGISTER_SAVE_R9(%rsp), %r9 + movq REGISTER_SAVE_R8(%rsp), %r8 + movq REGISTER_SAVE_RDI(%rsp), %rdi + movq REGISTER_SAVE_RSI(%rsp), %rsi + movq REGISTER_SAVE_RDX(%rsp), %rdx + movq REGISTER_SAVE_RCX(%rsp), %rcx + movq REGISTER_SAVE_RAX(%rsp), %rax + # Adjust stack(PLT did 2 pushes) + addq $(REGISTER_SAVE_AREA + 16), %rsp + cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16)) jmp *%r11 # Jump to function address. cfi_endproc .size _dl_runtime_resolve, .-_dl_runtime_resolve |