about summary refs log tree commit diff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-07-29 08:33:03 -0700
committerUlrich Drepper <drepper@redhat.com>2009-07-29 08:33:03 -0700
commitb48a267b8fbb885191a04cffdb4050a4d4c8a20b (patch)
tree1a517e3273ee5785b44a9bd5b9aec9ae62b95ac3 /sysdeps/x86_64
parent9655389317c92e5935c47d90c0ba48ca54bd245e (diff)
downloadglibc-b48a267b8fbb885191a04cffdb4050a4d4c8a20b.tar.gz
glibc-b48a267b8fbb885191a04cffdb4050a4d4c8a20b.tar.xz
glibc-b48a267b8fbb885191a04cffdb4050a4d4c8a20b.zip
Preserve SSE registers in runtime relocations on x86-64.
SSE registers are used for passing parameters and must be preserved
in runtime relocations.  This is inside ld.so enforced through the
tests in tst-xmmymm.sh.  But the malloc routines used after startup
come from libc.so and can be arbitrarily complex.  It's overkill
to save the SSE registers all the time because of that.  These calls
are rare.  Instead we save them on demand.  The new infrastructure
put in place in this patch makes this possible and efficient.
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/dl-trampoline.S82
-rwxr-xr-xsysdeps/x86_64/tst-xmmymm.sh7
2 files changed, 86 insertions, 3 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 49d239f075..7ecf1b0c64 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -390,3 +390,85 @@ L(no_avx4):
 	cfi_endproc
 	.size _dl_runtime_profile, .-_dl_runtime_profile
 #endif
+
+
+#ifdef SHARED
+	.globl _dl_x86_64_save_sse
+	.type _dl_x86_64_save_sse, @function
+	.align 16
+	cfi_startproc
+_dl_x86_64_save_sse:
+# ifdef HAVE_AVX_SUPPORT
+	cmpl	$0, L(have_avx)(%rip)
+	jne	1f
+	movq	%rbx, %r11		# Save rbx
+	movl	$1, %eax
+	cpuid
+	movq	%r11,%rbx		# Restore rbx
+	movl	$1, %eax
+	testl	$(1 << 28), %ecx
+	jne	2f
+	negl	%eax
+2:	movl	%eax, L(have_avx)(%rip)
+	cmpl	$0, %eax
+
+1:	js	L(no_avx5)
+
+#  define YMM_SIZE 32
+	vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
+	vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
+	vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
+	vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
+	vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
+	vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
+	vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
+	vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
+	ret
+L(no_avx5):
+# endif
+# define YMM_SIZE 16
+	movdqa	%xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
+	movdqa	%xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
+	movdqa	%xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
+	movdqa	%xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
+	movdqa	%xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
+	movdqa	%xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
+	movdqa	%xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
+	movdqa	%xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
+	ret
+	cfi_endproc
+	.size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
+
+
+	.globl _dl_x86_64_restore_sse
+	.type _dl_x86_64_restore_sse, @function
+	.align 16
+	cfi_startproc
+_dl_x86_64_restore_sse:
+# ifdef HAVE_AVX_SUPPORT
+	cmpl	$0, L(have_avx)(%rip)
+	js	L(no_avx6)
+
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
+	ret
+L(no_avx6):
+# endif
+	movdqa	%fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
+	movdqa	%fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
+	movdqa	%fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
+	movdqa	%fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
+	movdqa	%fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
+	movdqa	%fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
+	movdqa	%fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
+	movdqa	%fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
+	ret
+	cfi_endproc
+	.size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
+#endif
diff --git a/sysdeps/x86_64/tst-xmmymm.sh b/sysdeps/x86_64/tst-xmmymm.sh
index a576e7da0d..da8af7e686 100755
--- a/sysdeps/x86_64/tst-xmmymm.sh
+++ b/sysdeps/x86_64/tst-xmmymm.sh
@@ -59,10 +59,11 @@ for f in $tocheck; do
   objdump -d "$objpfx"../*/"$f" |
   awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
   while read fct; do
-    if test "$fct" != "_dl_runtime_profile"; then
-      echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
-      result=1
+    if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+      continue;
     fi
+    echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
+    result=1
   done
 done