diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2015-08-25 04:33:54 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2015-08-25 04:34:13 -0700 |
commit | f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e (patch) | |
tree | e43395ded84bf0aa25ecbe2d395082a182aae8b7 /sysdeps/x86_64/nptl | |
parent | 2d02fd07371bcd492c320cec649c6265787d794a (diff) | |
download | glibc-f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e.tar.gz glibc-f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e.tar.xz glibc-f3dcae82d54e5097e18e1d6ef4ff55c2ea4e621e.zip |
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve and _dl_runtime_profile, which save and restore the first 8 vector registers used for parameter passing. elf_machine_runtime_setup selects the proper _dl_runtime_resolve or _dl_runtime_profile based on _dl_x86_cpu_features. It avoids race condition caused by FOREIGN_CALL macros, which are only used for x86-64. Performance impact of saving and restoring 8 vector registers are negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when ld.so is optimized with SSE2. [BZ #15128] * sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add ifuncmain8. (modules-names): Add ifuncmod8. ($(objpfx)ifuncmain8): New rule. * sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and <cpuid.h>. (elf_machine_runtime_setup): Use _dl_runtime_resolve_sse, _dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512, _dl_runtime_profile_sse, _dl_runtime_profile_avx, or _dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE. * sysdeps/x86_64/dl-trampoline.S: Rewrite. * sysdeps/x86_64/dl-trampoline.h: Likewise. * sysdeps/x86_64/ifuncmain8.c: New file. * sysdeps/x86_64/ifuncmod8.c: Likewise. * sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE): Removed. * sysdeps/x86_64/nptl/tls.h (__128bits): Removed. (tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1. Change rtld_savespace_sse to __glibc_unused2. (RTLD_CHECK_FOREIGN_CALL): Removed. (RTLD_ENABLE_FOREIGN_CALL): Likewise. (RTLD_PREPARE_FOREIGN_CALL): Likewise. (RTLD_FINALIZE_FOREIGN_CALL): Likewise.
Diffstat (limited to 'sysdeps/x86_64/nptl')
-rw-r--r-- | sysdeps/x86_64/nptl/tcb-offsets.sym | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/nptl/tls.h | 42 |
2 files changed, 4 insertions, 39 deletions
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym index 729d1da38f..aeb752673a 100644 --- a/sysdeps/x86_64/nptl/tcb-offsets.sym +++ b/sysdeps/x86_64/nptl/tcb-offsets.sym @@ -16,7 +16,6 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache) #ifndef __ASSUME_PRIVATE_FUTEX PRIVATE_FUTEX offsetof (tcbhead_t, private_futex) #endif -RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse) -- Not strictly offsets, but these values are also used in the TCB. TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h index d7543c651f..b73e7edf6c 100644 --- a/sysdeps/x86_64/nptl/tls.h +++ b/sysdeps/x86_64/nptl/tls.h @@ -67,14 +67,15 @@ typedef struct # else int __glibc_reserved1; # endif - int rtld_must_xmm_save; + int __glibc_unused1; /* Reservation of some values for the TM ABI. */ void *__private_tm[4]; /* GCC split stack support. */ void *__private_ss; long int __glibc_reserved2; - /* Have space for the post-AVX register size. */ - __128bits rtld_savespace_sse[8][4] __attribute__ ((aligned (32))); + /* Must be kept even if it is no longer used by glibc since programs, + like AddressSanitizer, depend on the size of tcbhead_t. */ + __128bits __glibc_unused2[8][4] __attribute__ ((aligned (32))); void *__padding[8]; } tcbhead_t; @@ -384,41 +385,6 @@ typedef struct # define THREAD_GSCOPE_WAIT() \ GL(dl_wait_lookup_done) () - -# ifdef SHARED -/* Defined in dl-trampoline.S. */ -extern void _dl_x86_64_save_sse (void); -extern void _dl_x86_64_restore_sse (void); - -# define RTLD_CHECK_FOREIGN_CALL \ - (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0) - -/* NB: Don't use the xchg operation because that would imply a lock - prefix which is expensive and unnecessary. The cache line is also - not contested at all. */ -# define RTLD_ENABLE_FOREIGN_CALL \ - int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF, \ - header.rtld_must_xmm_save); \ - THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1) - -# define RTLD_PREPARE_FOREIGN_CALL \ - do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \ - { \ - _dl_x86_64_save_sse (); \ - THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \ - } \ - while (0) - -# define RTLD_FINALIZE_FOREIGN_CALL \ - do { \ - if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \ - _dl_x86_64_restore_sse (); \ - THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, \ - old_rtld_must_xmm_save); \ - } while (0) -# endif - - #endif /* __ASSEMBLER__ */ #endif /* tls.h */ |