diff options
author | Stefan Liebler <stli@linux.vnet.ibm.com> | 2016-03-31 17:37:16 +0200 |
---|---|---|
committer | Stefan Liebler <stli@linux.vnet.ibm.com> | 2016-03-31 17:37:16 +0200 |
commit | 4603c51ef7989d7eb800cdd6f42aab206f891077 (patch) | |
tree | c15dc0e82a1b8b16eac6fdef4fef8c70450921eb /sysdeps/s390/s390-32/dl-trampoline.S | |
parent | e91bd7465816f474617dcb4bbfe72f3594c5783c (diff) | |
download | glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.gz glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.xz glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.zip |
S390: Save and restore fprs/vrs while resolving symbols.
On s390, no fpr/vrs were saved while resolving a symbol via _dl_runtime_resolve/_dl_runtime_profile. According to the abi, the fpr-arguments are defined as call clobbered. In leaf-functions, gcc 4.9 and newer can use fprs for saving/restoring gprs instead of saving them to the stack. If gcc do this in one of the resolver-functions, then the floating point arguments of a library-function are invalid for the first library-function-call. Thus, this patch saves/restores the fprs around the resolving code. The same could occur for vector registers. Furthermore an ifunc-resolver could also clobber the vector/floating point argument registers. Thus this patch provides the further variants _dl_runtime_resolve_vx/ _dl_runtime_profile_vx, which are used if the kernel claims, that we run on a machine with vector registers. Furthermore, if _dl_runtime_profile calls _dl_call_pltexit, the pointers to inregs-/outregs-structs were setup invalid. Now they point to the correct location in the stack-frame. Before branching back to the caller, the return values are now restored instead of containing the return values of the _dl_call_pltexit() call. On s390-32, an endless loop occurs if _dl_call_pltexit() should be called. Now, this code-path branches to this function instead of just after the preceding basr-instruction. ChangeLog: * sysdeps/s390/s390-32/dl-trampoline.S: Include dl-trampoline.h twice to create a non-vector/vector version for _dl_runtime_resolve and _dl_runtime_profile. Move implementation to ... * sysdeps/s390/s390-32/dl-trampoline.h: ... here. (_dl_runtime_resolve) Save and restore fpr/vrs. (_dl_runtime_profile) Save and restore vrs and fix some issues if _dl_call_pltexit is called. * sysdeps/s390/s390-32/dl-machine.h (elf_machine_runtime_setup): Choose the correct resolver function if running on a machine with vx. * sysdeps/s390/s390-64/dl-trampoline.S: Include dl-trampoline.h twice to create a non-vector/vector version for _dl_runtime_resolve and _dl_runtime_profile. Move implementation to ... * sysdeps/s390/s390-64/dl-trampoline.h: ... here. (_dl_runtime_resolve) Save and restore fpr/vrs. (_dl_runtime_profile) Save and restore vrs and fix some issues * sysdeps/s390/s390-64/dl-machine.h: (elf_machine_runtime_setup): Choose the correct resolver function if running on a machine with vx.
Diffstat (limited to 'sysdeps/s390/s390-32/dl-trampoline.S')
-rw-r--r-- | sysdeps/s390/s390-32/dl-trampoline.S | 134 |
1 files changed, 11 insertions, 123 deletions
diff --git a/sysdeps/s390/s390-32/dl-trampoline.S b/sysdeps/s390/s390-32/dl-trampoline.S index 1645610383..859183caf5 100644 --- a/sysdeps/s390/s390-32/dl-trampoline.S +++ b/sysdeps/s390/s390-32/dl-trampoline.S @@ -16,130 +16,18 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -/* This code is used in dl-runtime.c to call the `fixup' function - and then redirect to the address it returns. */ - -/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile - * with the following linkage: - * r2 - r6 : parameter registers - * f0, f2 : floating point parameter registers - * 24(r15), 28(r15) : PLT arguments PLT1, PLT2 - * 96(r15) : additional stack parameters - * The normal clobber rules for function calls apply: - * r0 - r5 : call clobbered - * r6 - r13 : call saved - * r14 : return address (call clobbered) - * r15 : stack pointer (call saved) - * f4, f6 : call saved - * f0 - f3, f5, f7 - f15 : call clobbered - */ - #include <sysdep.h> .text - .globl _dl_runtime_resolve - .type _dl_runtime_resolve, @function - cfi_startproc - .align 16 -_dl_runtime_resolve: - stm %r2,%r5,32(%r15) # save registers - st %r14,8(%r15) - cfi_offset (r14, -88) - lr %r0,%r15 # create stack frame - ahi %r15,-96 - cfi_adjust_cfa_offset (96) - st 0,0(%r15) - lm %r2,%r3,120(%r15) # load args saved by PLT - basr %r1,0 -0: l %r14,1f-0b(%r1) - bas %r14,0(%r14,%r1) # call resolver - lr %r1,%r2 # function addr returned in r2 - ahi %r15,96 # remove stack frame - cfi_adjust_cfa_offset (-96) - l %r14,8(15) # restore registers - lm %r2,%r5,32(%r15) - br %r1 -1: .long _dl_fixup - 0b - cfi_endproc - .size _dl_runtime_resolve, .-_dl_runtime_resolve - - -#ifndef PROF - .globl _dl_runtime_profile - .type _dl_runtime_profile, @function - cfi_startproc - .align 16 -_dl_runtime_profile: - stm %r2,%r6,32(%r15) # save registers - std %f0,56(%r15) - std %f2,64(%r15) - st %r6,8(%r15) - st %r12,12(%r15) - st %r14,16(%r15) - cfi_offset (r6, -64) - cfi_offset (f0, -40) - cfi_offset (f2, -32) - cfi_offset (r12, -84) - cfi_offset (r14, -80) - lr %r12,%r15 # create stack frame - cfi_def_cfa_register (12) - ahi %r15,-96 - st %r12,0(%r15) - lm %r2,%r3,24(%r12) # load arguments saved by PLT - lr %r4,%r14 # return address as third parameter - basr %r1,0 -0: l %r14,6f-0b(%r1) - la %r5,32(%r12) # pointer to struct La_s390_32_regs - la %r6,20(%r12) # long int * framesize - bas %r14,0(%r14,%r1) # call resolver - lr %r1,%r2 # function addr returned in r2 - icm %r0,15,20(%r12) # load & test framesize - jnm 2f - - lm %r2,%r6,32(%r12) - ld %f0,56(%r12) - ld %f2,64(%r12) - lr %r15,%r12 # remove stack frame - cfi_def_cfa_register (15) - l %r14,16(%r15) # restore registers - l %r12,12(%r15) - br %r1 # tail-call to the resolved function - - cfi_def_cfa_register (12) -2: jz 4f # framesize == 0 ? - ahi %r0,7 # align framesize to 8 - lhi %r2,-8 - nr %r0,%r2 - slr %r15,%r0 # make room for framesize bytes - st %r12,0(%r15) - la %r2,96(%r15) - la %r3,96(%r12) - srl %r0,3 -3: mvc 0(8,%r2),0(%r3) # copy additional parameters - la %r2,8(%r2) - la %r3,8(%r3) - brct %r0,3b -4: lm %r2,%r6,32(%r12) # load register parameters - ld %f0,56(%r12) - ld %f2,64(%r12) - basr %r14,%r1 # call resolved function - stm %r2,%r3,72(%r12) - std %f0,80(%r12) - lm %r2,%r3,24(%r12) # load arguments saved by PLT - basr %r1,0 -5: l %r14,7f-5b(%r1) - la %r4,32(%r12) # pointer to struct La_s390_32_regs - la %r5,72(%r12) # pointer to struct La_s390_32_retval - basr %r14,%r1 # call _dl_call_pltexit - - lr %r15,%r12 # remove stack frame - cfi_def_cfa_register (15) - l %r14,16(%r15) # restore registers - l %r12,12(%r15) - br %r14 - -6: .long _dl_profile_fixup - 0b -7: .long _dl_call_pltexit - 5b - cfi_endproc - .size _dl_runtime_profile, .-_dl_runtime_profile +/* Create variant of _dl_runtime_resolve/profile for machines before z13. + No vector registers are saved/restored. */ +#include <dl-trampoline.h> + +#if defined HAVE_S390_VX_ASM_SUPPORT +/* Create variant of _dl_runtime_resolve/profile for z13 and newer. + The vector registers are saved/restored, too.*/ +# define _dl_runtime_resolve _dl_runtime_resolve_vx +# define _dl_runtime_profile _dl_runtime_profile_vx +# define RESTORE_VRS +# include <dl-trampoline.h> #endif |