about summary refs log tree commit diff
path: root/sysdeps/x86_64/ifuncmod8.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-08-25 04:33:54 -0700
committerH.J. Lu <hjl.tools@gmail.com>2015-08-25 04:34:13 -0700
commitf3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e (patch)
treee43395ded84bf0aa25ecbe2d395082a182aae8b7 /sysdeps/x86_64/ifuncmod8.c
parent2d02fd07371bcd492c320cec649c6265787d794a (diff)
downloadglibc-f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e.tar.gz
glibc-f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e.tar.xz
glibc-f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e.zip
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing.  elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features.  It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.

Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.

	[BZ #15128]
	* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
	ifuncmain8.
	(modules-names): Add ifuncmod8.
	($(objpfx)ifuncmain8): New rule.
	* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
	<cpuid.h>.
	(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
	_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
	_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
	_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
	* sysdeps/x86_64/dl-trampoline.S: Rewrite.
	* sysdeps/x86_64/dl-trampoline.h: Likewise.
	* sysdeps/x86_64/ifuncmain8.c: New file.
	* sysdeps/x86_64/ifuncmod8.c: Likewise.
	* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
	Removed.
	* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
	(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
	Change rtld_savespace_sse to __glibc_unused2.
	(RTLD_CHECK_FOREIGN_CALL): Removed.
	(RTLD_ENABLE_FOREIGN_CALL): Likewise.
	(RTLD_PREPARE_FOREIGN_CALL): Likewise.
	(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
Diffstat (limited to 'sysdeps/x86_64/ifuncmod8.c')
-rw-r--r--sysdeps/x86_64/ifuncmod8.c36
1 files changed, 36 insertions, 0 deletions
diff --git a/sysdeps/x86_64/ifuncmod8.c b/sysdeps/x86_64/ifuncmod8.c
new file mode 100644
index 0000000000..741aa13b20
--- /dev/null
+++ b/sysdeps/x86_64/ifuncmod8.c
@@ -0,0 +1,36 @@
+/* Test IFUNC selector with floating-point parameters.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <emmintrin.h>
+
+void * foo_ifunc (void) __asm__ ("foo");
+__asm__(".type foo, %gnu_indirect_function");
+
+static float
+foo_impl (float x)
+{
+  return x + 1;
+}
+
+void *
+foo_ifunc (void)
+{
+  __m128i xmm = _mm_set1_epi32 (-1);
+  asm volatile ("movdqa %0, %%xmm0" : : "x" (xmm) : "xmm0" );
+  return foo_impl;
+}