about summary refs log tree commit diff
path: root/sysdeps/x86_64/dl-trampoline.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/dl-trampoline.S')
-rw-r--r--sysdeps/x86_64/dl-trampoline.S179
1 files changed, 55 insertions, 124 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 33e6115f7b..f605351f30 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -17,7 +17,9 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <config.h>
 #include <sysdep.h>
+#include <link-defines.h>
 
 	.text
 	.globl _dl_runtime_resolve
@@ -89,135 +91,64 @@ _dl_runtime_profile:
 
 	/* Actively align the La_x86_64_regs structure.  */
 	andq $0xfffffffffffffff0, %rsp
-	subq $192, %rsp		# sizeof(La_x86_64_regs)
+# ifdef HAVE_AVX_SUPPORT
+	/* sizeof(La_x86_64_regs).  Need extra space for 8 SSE registers
+	   to detect if any xmm0-xmm7 registers are changed by audit
+	   module.  */
+	subq $(LR_SIZE + XMM_SIZE*8), %rsp
+#else
+	subq $LR_SIZE, %rsp		# sizeof(La_x86_64_regs)
+#endif
 	movq %rsp, 24(%rbx)
 
-	movq %rdx,   (%rsp)	# Fill the La_x86_64_regs structure.
-	movq %r8,   8(%rsp)
-	movq %r9,  16(%rsp)
-	movq %rcx, 24(%rsp)
-	movq %rsi, 32(%rsp)
-	movq %rdi, 40(%rsp)
-	movq %rbp, 48(%rsp)
-	leaq 48(%rbx), %rax
-	movq %rax, 56(%rsp)
-	movaps %xmm0,  64(%rsp)
-	movaps %xmm1,  80(%rsp)
-	movaps %xmm2,  96(%rsp)
-	movaps %xmm3, 112(%rsp)
-	movaps %xmm4, 128(%rsp)
-	movaps %xmm5, 144(%rsp)
-	movaps %xmm6, 160(%rsp)
-	movaps %xmm7, 176(%rsp)
-
-	movq %rsp, %rcx		# La_x86_64_regs pointer to %rcx.
-	movq 48(%rbx), %rdx	# Load return address if needed.
-	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
-	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
-	leaq 16(%rbx), %r8
-	call _dl_profile_fixup	# Call resolver.
-
-	movq %rax, %r11		# Save return value.
-
-	movq 8(%rbx), %rax	# Get back register content.
-	movq      (%rsp), %rdx
-	movq     8(%rsp), %r8
-	movq    16(%rsp), %r9
-	movaps  64(%rsp), %xmm0
-	movaps  80(%rsp), %xmm1
-	movaps  96(%rsp), %xmm2
-	movaps 112(%rsp), %xmm3
-	movaps 128(%rsp), %xmm4
-	movaps 144(%rsp), %xmm5
-	movaps 160(%rsp), %xmm6
-	movaps 176(%rsp), %xmm7
-
-	movq 16(%rbx), %r10	# Anything in framesize?
-	testq %r10, %r10
-	jns 1f
-
-	/* There's nothing in the frame size, so there
-	   will be no call to the _dl_call_pltexit. */
-
-	movq 24(%rsp), %rcx	# Get back registers content.
-	movq 32(%rsp), %rsi
-	movq 40(%rsp), %rdi
-
-	movq %rbx, %rsp
-	movq (%rsp), %rbx
-	cfi_restore(rbx)
-	cfi_def_cfa_register(%rsp)
-
-	addq $48, %rsp		# Adjust the stack to the return value
-				# (eats the reloc index and link_map)
-	cfi_adjust_cfa_offset(-48)
-	jmp *%r11		# Jump to function address.
+	/* Fill the La_x86_64_regs structure.  */
+	movq %rdx, LR_RDX_OFFSET(%rsp)
+	movq %r8,  LR_R8_OFFSET(%rsp)
+	movq %r9,  LR_R9_OFFSET(%rsp)
+	movq %rcx, LR_RCX_OFFSET(%rsp)
+	movq %rsi, LR_RSI_OFFSET(%rsp)
+	movq %rdi, LR_RDI_OFFSET(%rsp)
+	movq %rbp, LR_RBP_OFFSET(%rsp)
 
-1:
-	cfi_adjust_cfa_offset(48)
-	cfi_rel_offset(%rbx, 0)
-	cfi_def_cfa_register(%rbx)
+# ifdef HAVE_AVX_SUPPORT
+	jmp *L(save_and_restore_vector)(%rip)
 
-	/* At this point we need to prepare new stack for the function
-	   which has to be called.  We copy the original stack to a
-	   temporary buffer of the size specified by the 'framesize'
-	   returned from _dl_profile_fixup */
-
-	leaq 56(%rbx), %rsi	# stack
-	addq $8, %r10
-	andq $0xfffffffffffffff0, %r10
-	movq %r10, %rcx
-	subq %r10, %rsp
-	movq %rsp, %rdi
-	shrq $3, %rcx
-	rep
-	movsq
-
-	movq 24(%rdi), %rcx	# Get back register content.
-	movq 32(%rdi), %rsi
-	movq 40(%rdi), %rdi
-
-	call *%r11
-
-	mov 24(%rbx), %rsp	# Drop the copied stack content
-
-	/* Now we have to prepare the La_x86_64_retval structure for the
-	   _dl_call_pltexit.  The La_x86_64_regs is being pointed by rsp now,
-	   so we just need to allocate the sizeof(La_x86_64_retval) space on
-	   the stack, since the alignment has already been taken care of. */
-
-	subq $80, %rsp		# sizeof(La_x86_64_retval)
-	movq %rsp, %rcx		# La_x86_64_retval argument to %rcx.
-
-	movq %rax, (%rcx)	# Fill in the La_x86_64_retval structure.
-	movq %rdx, 8(%rcx)
-	movaps %xmm0, 16(%rcx)
-	movaps %xmm1, 32(%rcx)
-	fstpt 48(%rcx)
-	fstpt 64(%rcx)
-
-	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
-	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
-        movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
-	call _dl_call_pltexit
-
-	movq  (%rsp), %rax	# Restore return registers.
-	movq 8(%rsp), %rdx
-	movaps 16(%rsp), %xmm0
-	movaps 32(%rsp), %xmm1
-	fldt 64(%rsp)
-	fldt 48(%rsp)
-
-	movq %rbx, %rsp
-	movq  (%rsp), %rbx
-	cfi_restore(rbx)
-	cfi_def_cfa_register(%rsp)
-
-	addq $48, %rsp		# Adjust the stack to the return value
-				# (eats the reloc index and link_map)
-	cfi_adjust_cfa_offset(-48)
-	retq
+	.align 16
+L(save_and_restore_vector_sse):
+# endif
+
+# define MOVXMM movaps
+# include "dl-trampoline.h"
+
+# ifdef HAVE_AVX_SUPPORT
+#  undef  MOVXMM
+#  define MOVXMM vmovdqa
+#  define RESTORE_AVX
+	.align 16
+L(save_and_restore_vector_avx):
+#  include "dl-trampoline.h"
+# endif
 
 	cfi_endproc
 	.size _dl_runtime_profile, .-_dl_runtime_profile
+
+# ifdef HAVE_AVX_SUPPORT
+L(check_avx):
+	mov	%rbx,%r11		# Save rbx
+	movl	$1, %eax
+	cpuid
+	mov	%r11,%rbx		# Restore rbx
+	leaq    L(save_and_restore_vector_sse)(%rip), %rax
+	andl	$(1 << 28), %ecx	# Check if AVX is available.
+	jz	L(ret)
+	leaq    L(save_and_restore_vector_avx)(%rip), %rax
+L(ret):
+	movq	%rax,L(save_and_restore_vector)(%rip)
+	jmp	*%rax
+
+	.section .data.rel.local,"aw",@progbits
+	.align	8
+L(save_and_restore_vector):
+	.quad L(check_avx)
+# endif
 #endif