about summary refs log tree commit diff
path: root/sysdeps/s390/s390-32/dl-trampoline.h
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.vnet.ibm.com>2016-03-31 17:37:16 +0200
committerStefan Liebler <stli@linux.vnet.ibm.com>2016-03-31 17:37:16 +0200
commit4603c51ef7989d7eb800cdd6f42aab206f891077 (patch)
treec15dc0e82a1b8b16eac6fdef4fef8c70450921eb /sysdeps/s390/s390-32/dl-trampoline.h
parente91bd7465816f474617dcb4bbfe72f3594c5783c (diff)
downloadglibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.gz
glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.xz
glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.zip
S390: Save and restore fprs/vrs while resolving symbols.
On s390, no fpr/vrs were saved while resolving a symbol
via _dl_runtime_resolve/_dl_runtime_profile.

According to the abi, the fpr-arguments are defined as call clobbered.
In leaf-functions, gcc 4.9 and newer can use fprs for saving/restoring gprs
instead of saving them to the stack.
If gcc do this in one of the resolver-functions, then the floating point
arguments of a library-function are invalid for the first library-function-call.
Thus, this patch saves/restores the fprs around the resolving code.

The same could occur for vector registers. Furthermore an ifunc-resolver
could also clobber the vector/floating point argument registers.
Thus this patch provides the further variants _dl_runtime_resolve_vx/
_dl_runtime_profile_vx, which are used if the kernel claims, that
we run on a machine with vector registers.

Furthermore, if _dl_runtime_profile calls _dl_call_pltexit,
the pointers to inregs-/outregs-structs were setup invalid.
Now they point to the correct location in the stack-frame.
Before branching back to the caller, the return values are now
restored instead of containing the return values of the
_dl_call_pltexit() call.
On s390-32, an endless loop occurs if _dl_call_pltexit() should be called.
Now, this code-path branches to this function instead of just after the
preceding basr-instruction.

ChangeLog:

	* sysdeps/s390/s390-32/dl-trampoline.S: Include dl-trampoline.h twice
	to create a non-vector/vector version for _dl_runtime_resolve and
	_dl_runtime_profile. Move implementation to ...
	* sysdeps/s390/s390-32/dl-trampoline.h: ... here.
	(_dl_runtime_resolve) Save and restore fpr/vrs.
	(_dl_runtime_profile) Save and restore vrs and fix some issues
	if _dl_call_pltexit is called.
	* sysdeps/s390/s390-32/dl-machine.h (elf_machine_runtime_setup):
	Choose the correct resolver function if running on a machine with vx.
	* sysdeps/s390/s390-64/dl-trampoline.S: Include dl-trampoline.h twice
	to create a non-vector/vector version for _dl_runtime_resolve and
	_dl_runtime_profile. Move implementation to ...
	* sysdeps/s390/s390-64/dl-trampoline.h: ... here.
	(_dl_runtime_resolve) Save and restore fpr/vrs.
	(_dl_runtime_profile) Save and restore vrs and fix some issues
	* sysdeps/s390/s390-64/dl-machine.h: (elf_machine_runtime_setup):
	Choose the correct resolver function if running on a machine with vx.
Diffstat (limited to 'sysdeps/s390/s390-32/dl-trampoline.h')
-rw-r--r--sysdeps/s390/s390-32/dl-trampoline.h215
1 files changed, 215 insertions, 0 deletions
diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h
new file mode 100644
index 0000000000..a152a7be49
--- /dev/null
+++ b/sysdeps/s390/s390-32/dl-trampoline.h
@@ -0,0 +1,215 @@
+/* PLT trampolines.  s390 version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+
+/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile
+ * with the following linkage:
+ *   r2 - r6 : parameter registers
+ *   f0, f2 : floating point parameter registers
+ *   v24, v26, v28, v30, v25, v27, v29, v31 : vector parameter registers
+ *   24(r15), 28(r15) : PLT arguments PLT1, PLT2
+ *   96(r15) : additional stack parameters
+ * The normal clobber rules for function calls apply:
+ *   r0 - r5 : call clobbered
+ *   r6 - r13 :	call saved
+ *   r14 : return address (call clobbered)
+ *   r15 : stack pointer (call saved)
+ *   f4, f6 : call saved
+ *   f0 - f3, f5, f7 - f15 : call clobbered
+ *   v0 - v3, v5, v7 - v15 : bytes 0-7 overlap with fprs: call clobbered
+               bytes 8-15: call clobbered
+ *   v4, v6 : bytes 0-7 overlap with f4, f6: call saved
+              bytes 8-15: call clobbered
+ *   v16 - v31 : call clobbered
+ */
+
+
+	.globl _dl_runtime_resolve
+	.type _dl_runtime_resolve, @function
+	cfi_startproc
+	.align 16
+_dl_runtime_resolve:
+	stm    %r2,%r5,32(%r15)		# save registers
+	cfi_offset (r2, -64)
+	cfi_offset (r3, -60)
+	cfi_offset (r4, -56)
+	cfi_offset (r5, -52)
+	std    %f0,56(%r15)
+	cfi_offset (f0, -40)
+	std    %f2,64(%r15)
+	cfi_offset (f2, -32)
+	st     %r14,8(%r15)
+	cfi_offset (r14, -88)
+	lr     %r0,%r15
+	lm     %r2,%r3,24(%r15)		# load args saved by PLT
+#ifdef RESTORE_VRS
+	ahi    %r15,-224		# create stack frame
+	cfi_adjust_cfa_offset (224)
+	.machine push
+	.machine "z13"
+	.machinemode "zarch_nohighgprs"
+	vstm   %v24,%v31,96(%r15)	# store call-clobbered vr arguments
+	cfi_offset (v24, -224)
+	cfi_offset (v25, -208)
+	cfi_offset (v26, -192)
+	cfi_offset (v27, -176)
+	cfi_offset (v28, -160)
+	cfi_offset (v29, -144)
+	cfi_offset (v30, -128)
+	cfi_offset (v31, -112)
+	.machine pop
+#else
+	ahi    %r15,-96			# create stack frame
+	cfi_adjust_cfa_offset (96)
+#endif
+	st     %r0,0(%r15)		# write backchain
+	basr   %r1,0
+0:	l      %r14,1f-0b(%r1)
+	bas    %r14,0(%r14,%r1)		# call _dl_fixup
+	lr     %r1,%r2			# function addr returned in r2
+#ifdef RESTORE_VRS
+	.machine push
+	.machine "z13"
+	.machinemode "zarch_nohighgprs"
+	vlm    %v24,%v31,96(%r15)	# restore vector registers
+	.machine pop
+	aghi   %r15,224			# remove stack frame
+	cfi_adjust_cfa_offset (-224)
+#else
+	ahi    %r15,96			# remove stack frame
+	cfi_adjust_cfa_offset (-96)
+#endif
+	l      %r14,8(15)		# restore registers
+	ld     %f0,56(%r15)
+	ld     %f2,64(%r15)
+	lm     %r2,%r5,32(%r15)
+	br     %r1
+1:	.long  _dl_fixup - 0b
+	cfi_endproc
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve
+
+
+#ifndef PROF
+	.globl _dl_runtime_profile
+	.type _dl_runtime_profile, @function
+	cfi_startproc
+	.align 16
+_dl_runtime_profile:
+	stm    %r2,%r6,32(%r15)		# save registers
+	cfi_offset (r2, -64)		# + r6 needed as arg for
+	cfi_offset (r3, -60)		#  _dl_profile_fixup
+	cfi_offset (r4, -56)
+	cfi_offset (r5, -52)
+	cfi_offset (r6, -48)
+	std    %f0,56(%r15)
+	cfi_offset (f0, -40)
+	std    %f2,64(%r15)
+	cfi_offset (f2, -32)
+	st     %r12,12(%r15)		# r12 is used as backup of r15
+	cfi_offset (r12, -84)
+	st     %r14,16(%r15)
+	cfi_offset (r14, -80)
+	lr     %r12,%r15		# backup stack pointer
+	cfi_def_cfa_register (12)
+#ifdef RESTORE_VRS
+	ahi    %r15,-224		# create stack frame
+	.machine push
+	.machine "z13"
+	.machinemode "zarch_nohighgprs"
+	vstm   %v24,%v31,96(%r15)	# store call-clobbered vr arguments
+	cfi_offset (v24, -224)
+	cfi_offset (v25, -208)
+	cfi_offset (v26, -192)
+	cfi_offset (v27, -176)
+	cfi_offset (v28, -160)
+	cfi_offset (v29, -144)
+	cfi_offset (v30, -128)
+	cfi_offset (v31, -112)
+	.machine pop
+#else
+	ahi    %r15,-96			# create stack frame
+#endif
+	st     %r12,0(%r15)		# save backchain
+	lm     %r2,%r3,24(%r12)		# load arguments saved by PLT
+	lr     %r4,%r14			# return address as third parameter
+	basr   %r1,0
+0:	l      %r14,6f-0b(%r1)
+	la     %r5,32(%r12)		# pointer to struct La_s390_32_regs
+	la     %r6,20(%r12)		# long int * framesize
+	bas    %r14,0(%r14,%r1)		# call resolver
+	lr     %r1,%r2			# function addr returned in r2
+	ld     %f0,56(%r12)		# restore call-clobbered arg fprs
+	ld     %f2,64(%r12)
+#ifdef RESTORE_VRS
+	.machine push
+	.machine "z13"
+	.machinemode "zarch_nohighgprs"
+	vlm    %v24,%v31,96(%r15)	# restore call-clobbered arg vrs
+	.machine pop
+#endif
+	icm    %r0,15,20(%r12)		# load & test framesize
+	jnm    2f
+
+	lm     %r2,%r6,32(%r12)
+	lr     %r15,%r12		# remove stack frame
+	cfi_def_cfa_register (15)
+	l      %r14,16(%r15)		# restore registers
+	l      %r12,12(%r15)
+	br     %r1			# tail-call to the resolved function
+
+	cfi_def_cfa_register (12)
+2:	jz     4f			# framesize == 0 ?
+	ahi    %r0,7			# align framesize to 8
+	lhi    %r2,-8
+	nr     %r0,%r2
+	slr    %r15,%r0			# make room for framesize bytes
+	st     %r12,0(%r15)		# save backchain
+	la     %r2,96(%r15)
+	la     %r3,96(%r12)
+	srl    %r0,3
+3:	mvc    0(8,%r2),0(%r3)		# copy additional parameters
+	la     %r2,8(%r2)
+	la     %r3,8(%r3)
+	brct   %r0,3b
+4:	lm     %r2,%r6,32(%r12)		# load register parameters
+	basr   %r14,%r1			# call resolved function
+	stm    %r2,%r3,72(%r12)		# store return values r2, r3, f0
+	std    %f0,80(%r12)		# to struct La_s390_32_retval
+	lm     %r2,%r3,24(%r12)		# load arguments saved by PLT
+	basr   %r1,0
+5:	l      %r14,7f-5b(%r1)
+	la     %r4,32(%r12)		# pointer to struct La_s390_32_regs
+	la     %r5,72(%r12)		# pointer to struct La_s390_32_retval
+	bas    %r14,0(%r14,%r1)		# call _dl_call_pltexit
+
+	lr     %r15,%r12		# remove stack frame
+	cfi_def_cfa_register (15)
+	l      %r14,16(%r15)		# restore registers
+	l      %r12,12(%r15)
+	l      %r2,72(%r15)		# restore return values
+	l      %r3,76(%r15)
+	ld     %f0,80(%r15)
+	br     %r14
+
+6:	.long  _dl_profile_fixup - 0b
+7:	.long  _dl_call_pltexit - 5b
+	cfi_endproc
+	.size _dl_runtime_profile, .-_dl_runtime_profile
+#endif