diff options
Diffstat (limited to 'sysdeps')
38 files changed, 910 insertions, 417 deletions
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h index 282805e396..e86d8b5b63 100644 --- a/sysdeps/aarch64/dl-machine.h +++ b/sysdeps/aarch64/dl-machine.h @@ -172,8 +172,8 @@ _dl_start_user: \n\ cmp x0, #0 \n\ bne 1b \n\ // Update _dl_argv \n\ - adrp x3, _dl_argv \n\ - str x2, [x3, #:lo12:_dl_argv] \n\ + adrp x3, __GI__dl_argv \n\ + str x2, [x3, #:lo12:__GI__dl_argv] \n\ .L_done_stack_adjust: \n\ // compute envp \n\ add x3, x2, x1, lsl #3 \n\ diff --git a/sysdeps/gnu/glob64.c b/sysdeps/gnu/glob64.c index d1e4e6f0d5..52e97e2f6a 100644 --- a/sysdeps/gnu/glob64.c +++ b/sysdeps/gnu/glob64.c @@ -15,11 +15,8 @@ #undef __stat #define __stat(file, buf) __xstat64 (_STAT_VER, file, buf) -#define NO_GLOB_PATTERN_P 1 - #define COMPILE_GLOB64 1 #include <posix/glob.c> libc_hidden_def (glob64) -libc_hidden_def (globfree64) diff --git a/sysdeps/gnu/globfree64.c b/sysdeps/gnu/globfree64.c new file mode 100644 index 0000000000..f092d0bf8b --- /dev/null +++ b/sysdeps/gnu/globfree64.c @@ -0,0 +1,10 @@ +#include <dirent.h> +#include <glob.h> +#include <sys/stat.h> + +#define glob_t glob64_t +#define globfree(pglob) globfree64 (pglob) + +#include <posix/globfree.c> + +libc_hidden_def (globfree64) diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c index 663fa392c2..bd758b5979 100644 --- a/sysdeps/ieee754/dbl-64/e_pow.c +++ b/sysdeps/ieee754/dbl-64/e_pow.c @@ -466,15 +466,15 @@ checkint (double x) return (n & 1) ? -1 : 1; /* odd or even */ if (k > 20) { - if (n << (k - 20)) + if (n << (k - 20) != 0) return 0; /* if not integer */ - return (n << (k - 21)) ? -1 : 1; + return (n << (k - 21) != 0) ? -1 : 1; } if (n) return 0; /*if not integer */ if (k == 20) return (m & 1) ? -1 : 1; - if (m << (k + 12)) + if (m << (k + 12) != 0) return 0; - return (m << (k + 11)) ? -1 : 1; + return (m << (k + 11) != 0) ? -1 : 1; } diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 35e1ed48d2..32beaa67d0 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -140,7 +140,7 @@ endif ifeq ($(subdir),posix) sysdep_headers += bits/initspin.h -sysdep_routines += sched_getcpu +sysdep_routines += sched_getcpu oldglob tests += tst-affinity tst-affinity-pid diff --git a/sysdeps/unix/sysv/linux/alpha/glob.c b/sysdeps/unix/sysv/linux/alpha/glob.c index c5dfb85468..19eb9b1c07 100644 --- a/sysdeps/unix/sysv/linux/alpha/glob.c +++ b/sysdeps/unix/sysv/linux/alpha/glob.c @@ -42,10 +42,6 @@ extern void __new_globfree (glob_t *__pglob); #undef globfree64 versioned_symbol (libc, __new_glob, glob, GLIBC_2_1); -versioned_symbol (libc, __new_globfree, globfree, GLIBC_2_1); libc_hidden_ver (__new_glob, glob) -libc_hidden_ver (__new_globfree, globfree) weak_alias (__new_glob, glob64) -weak_alias (__new_globfree, globfree64) -libc_hidden_ver (__new_globfree, globfree64) diff --git a/sysdeps/unix/sysv/linux/alpha/globfree.c b/sysdeps/unix/sysv/linux/alpha/globfree.c new file mode 100644 index 0000000000..98cf1c200b --- /dev/null +++ b/sysdeps/unix/sysv/linux/alpha/globfree.c @@ -0,0 +1,37 @@ +/* Compat globfree. Linux/alpha version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define globfree64 __no_globfree64_decl +#include <sys/types.h> +#include <glob.h> +#include <shlib-compat.h> + +#define globfree(pglob) \ + __new_globfree (pglob) + +extern void __new_globfree (glob_t *__pglob); + +#include <posix/globfree.c> + +#undef globfree64 + +versioned_symbol (libc, __new_globfree, globfree, GLIBC_2_1); +libc_hidden_ver (__new_globfree, globfree) + +weak_alias (__new_globfree, globfree64) +libc_hidden_ver (__new_globfree, globfree64) diff --git a/sysdeps/unix/sysv/linux/i386/glob64.c b/sysdeps/unix/sysv/linux/i386/glob64.c index 802c957d6c..c2cc85741f 100644 --- a/sysdeps/unix/sysv/linux/i386/glob64.c +++ b/sysdeps/unix/sysv/linux/i386/glob64.c @@ -19,6 +19,7 @@ #include <dirent.h> #include <glob.h> #include <sys/stat.h> +#include <shlib-compat.h> #define dirent dirent64 #define __readdir(dirp) __readdir64 (dirp) @@ -33,44 +34,9 @@ #undef __stat #define __stat(file, buf) __xstat64 (_STAT_VER, file, buf) -#define NO_GLOB_PATTERN_P 1 - #define COMPILE_GLOB64 1 #include <posix/glob.c> -#include "shlib-compat.h" - -libc_hidden_def (globfree64) - versioned_symbol (libc, __glob64, glob64, GLIBC_2_2); libc_hidden_ver (__glob64, glob64) - -#if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) - -#include <sysdeps/unix/sysv/linux/i386/olddirent.h> - -int __old_glob64 (const char *__pattern, int __flags, - int (*__errfunc) (const char *, int), - glob64_t *__pglob); - -#undef dirent -#define dirent __old_dirent64 -#undef GL_READDIR -# define GL_READDIR(pglob, stream) \ - ((struct __old_dirent64 *) (pglob)->gl_readdir (stream)) -#undef __readdir -#define __readdir(dirp) __old_readdir64 (dirp) -#undef glob -#define glob(pattern, flags, errfunc, pglob) \ - __old_glob64 (pattern, flags, errfunc, pglob) -#define convert_dirent __old_convert_dirent -#define glob_in_dir __old_glob_in_dir -#define GLOB_ATTRIBUTE attribute_compat_text_section - -#define GLOB_ONLY_P 1 - -#include <posix/glob.c> - -compat_symbol (libc, __old_glob64, glob64, GLIBC_2_1); -#endif diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c b/sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c new file mode 100644 index 0000000000..abc35fdd2b --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c @@ -0,0 +1 @@ +/* glob64 is in globfree64.c */ diff --git a/sysdeps/unix/sysv/linux/oldglob.c b/sysdeps/unix/sysv/linux/oldglob.c new file mode 100644 index 0000000000..8233e57ce9 --- /dev/null +++ b/sysdeps/unix/sysv/linux/oldglob.c @@ -0,0 +1,42 @@ +#include <shlib-compat.h> + +#if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) + +#include <dirent.h> +#include <glob.h> +#include <sys/stat.h> + +#include <sysdeps/unix/sysv/linux/i386/olddirent.h> + +int __old_glob64 (const char *__pattern, int __flags, + int (*__errfunc) (const char *, int), + glob64_t *__pglob); +libc_hidden_proto (__old_glob64); + +#define dirent __old_dirent64 +#define GL_READDIR(pglob, stream) \ + ((struct __old_dirent64 *) (pglob)->gl_readdir (stream)) +#undef __readdir +#define __readdir(dirp) __old_readdir64 (dirp) + +#define glob_t glob64_t +#define glob(pattern, flags, errfunc, pglob) \ + __old_glob64 (pattern, flags, errfunc, pglob) +#define globfree(pglob) globfree64(pglob) + +#define convert_dirent __old_convert_dirent +#define glob_in_dir __old_glob_in_dir + +#undef stat +#define stat stat64 +#undef __stat +#define __stat(file, buf) __xstat64 (_STAT_VER, file, buf) + +#define GLOB_ATTRIBUTE attribute_compat_text_section + +#include <posix/glob.c> + +libc_hidden_def (__old_glob64); + +compat_symbol (libc, __old_glob64, glob64, GLIBC_2_1); +#endif diff --git a/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym b/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym index 17397c5511..25f914a93b 100644 --- a/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym +++ b/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym @@ -13,22 +13,22 @@ SIG_SETMASK oLINK ucontext (uc_link) oSS_SP ucontext (uc_stack.ss_sp) oSS_SIZE ucontext (uc_stack.ss_size) -oR0 mcontext (gregs[R0]) -oR1 mcontext (gregs[R1]) -oR2 mcontext (gregs[R2]) -oR3 mcontext (gregs[R3]) -oR4 mcontext (gregs[R4]) -oR5 mcontext (gregs[R5]) -oR6 mcontext (gregs[R6]) -oR7 mcontext (gregs[R7]) -oR8 mcontext (gregs[R8]) -oR9 mcontext (gregs[R9]) -oR10 mcontext (gregs[R10]) -oR11 mcontext (gregs[R11]) -oR12 mcontext (gregs[R12]) -oR13 mcontext (gregs[R13]) -oR14 mcontext (gregs[R14]) -oR15 mcontext (gregs[R15]) +oR0 mcontext (gregs[REG_R0]) +oR1 mcontext (gregs[REG_R1]) +oR2 mcontext (gregs[REG_R2]) +oR3 mcontext (gregs[REG_R3]) +oR4 mcontext (gregs[REG_R4]) +oR5 mcontext (gregs[REG_R5]) +oR6 mcontext (gregs[REG_R6]) +oR7 mcontext (gregs[REG_R7]) +oR8 mcontext (gregs[REG_R8]) +oR9 mcontext (gregs[REG_R9]) +oR10 mcontext (gregs[REG_R10]) +oR11 mcontext (gregs[REG_R11]) +oR12 mcontext (gregs[REG_R12]) +oR13 mcontext (gregs[REG_R13]) +oR14 mcontext (gregs[REG_R14]) +oR15 mcontext (gregs[REG_R15]) oPC mcontext (pc) oPR mcontext (pr) oSR mcontext (sr) diff --git a/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym b/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym index 65633fbcf4..130f60cd96 100644 --- a/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym +++ b/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym @@ -13,22 +13,22 @@ SIG_SETMASK oLINK ucontext (uc_link) oSS_SP ucontext (uc_stack.ss_sp) oSS_SIZE ucontext (uc_stack.ss_size) -oR0 mcontext (gregs[R0]) -oR1 mcontext (gregs[R1]) -oR2 mcontext (gregs[R2]) -oR3 mcontext (gregs[R3]) -oR4 mcontext (gregs[R4]) -oR5 mcontext (gregs[R5]) -oR6 mcontext (gregs[R6]) -oR7 mcontext (gregs[R7]) -oR8 mcontext (gregs[R8]) -oR9 mcontext (gregs[R9]) -oR10 mcontext (gregs[R10]) -oR11 mcontext (gregs[R11]) -oR12 mcontext (gregs[R12]) -oR13 mcontext (gregs[R13]) -oR14 mcontext (gregs[R14]) -oR15 mcontext (gregs[R15]) +oR0 mcontext (gregs[REG_R0]) +oR1 mcontext (gregs[REG_R1]) +oR2 mcontext (gregs[REG_R2]) +oR3 mcontext (gregs[REG_R3]) +oR4 mcontext (gregs[REG_R4]) +oR5 mcontext (gregs[REG_R5]) +oR6 mcontext (gregs[REG_R6]) +oR7 mcontext (gregs[REG_R7]) +oR8 mcontext (gregs[REG_R8]) +oR9 mcontext (gregs[REG_R9]) +oR10 mcontext (gregs[REG_R10]) +oR11 mcontext (gregs[REG_R11]) +oR12 mcontext (gregs[REG_R12]) +oR13 mcontext (gregs[REG_R13]) +oR14 mcontext (gregs[REG_R14]) +oR15 mcontext (gregs[REG_R15]) oPC mcontext (pc) oPR mcontext (pr) oSR mcontext (sr) diff --git a/sysdeps/unix/sysv/linux/sh/sys/ucontext.h b/sysdeps/unix/sysv/linux/sh/sys/ucontext.h index ab9a7e66bf..037fbb73e8 100644 --- a/sysdeps/unix/sysv/linux/sh/sys/ucontext.h +++ b/sysdeps/unix/sysv/linux/sh/sys/ucontext.h @@ -31,49 +31,47 @@ typedef int greg_t; /* Number of general registers. */ -#define NGPREG 16 +#define NGREG 16 /* Container for all general registers. */ -typedef greg_t gregset_t[NGPREG]; +typedef greg_t gregset_t[NGREG]; -#ifdef __USE_GNU /* Number of each register is the `gregset_t' array. */ enum { - R0 = 0, -#define R0 R0 - R1 = 1, -#define R1 R1 - R2 = 2, -#define R2 R2 - R3 = 3, -#define R3 R3 - R4 = 4, -#define R4 R4 - R5 = 5, -#define R5 R5 - R6 = 6, -#define R6 R6 - R7 = 7, -#define R7 R7 - R8 = 8, -#define R8 R8 - R9 = 9, -#define R9 R9 - R10 = 10, -#define R10 R10 - R11 = 11, -#define R11 R11 - R12 = 12, -#define R12 R12 - R13 = 13, -#define R13 R13 - R14 = 14, -#define R14 R14 - R15 = 15, -#define R15 R15 + REG_R0 = 0, +#define REG_R0 REG_R0 + REG_R1 = 1, +#define REG_R1 REG_R1 + REG_R2 = 2, +#define REG_R2 REG_R2 + REG_R3 = 3, +#define REG_R3 REG_R3 + REG_R4 = 4, +#define REG_R4 REG_R4 + REG_R5 = 5, +#define REG_R5 REG_R5 + REG_R6 = 6, +#define REG_R6 REG_R6 + REG_R7 = 7, +#define REG_R7 REG_R7 + REG_R8 = 8, +#define REG_R8 REG_R8 + REG_R9 = 9, +#define REG_R9 REG_R9 + REG_R10 = 10, +#define REG_R10 REG_R10 + REG_R11 = 11, +#define REG_R11 REG_R11 + REG_R12 = 12, +#define REG_R12 REG_R12 + REG_R13 = 13, +#define REG_R13 REG_R13 + REG_R14 = 14, +#define REG_R14 REG_R14 + REG_R15 = 15, +#define REG_R15 REG_R15 }; -#endif #if (defined(__SH4__) || defined(__SH4A__)) typedef int freg_t; diff --git a/sysdeps/unix/sysv/linux/wordsize-64/globfree64.c b/sysdeps/unix/sysv/linux/wordsize-64/globfree64.c new file mode 100644 index 0000000000..af035e1514 --- /dev/null +++ b/sysdeps/unix/sysv/linux/wordsize-64/globfree64.c @@ -0,0 +1,2 @@ +/* This file is here so sysdeps/gnu/glob64.c doesn't take precedence. */ +#include <sysdeps/wordsize-64/globfree64.c> diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/globfree.c b/sysdeps/unix/sysv/linux/x86_64/x32/globfree.c new file mode 100644 index 0000000000..b76a761c17 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/x32/globfree.c @@ -0,0 +1 @@ +#include <sysdeps/wordsize-64/globfree.c> diff --git a/sysdeps/wordsize-64/glob.c b/sysdeps/wordsize-64/glob.c index 082faf1c70..954e8d37e2 100644 --- a/sysdeps/wordsize-64/glob.c +++ b/sysdeps/wordsize-64/glob.c @@ -4,5 +4,3 @@ #undef glob64 #undef globfree64 weak_alias (glob, glob64) -weak_alias (globfree, globfree64) -libc_hidden_ver (globfree, globfree64) diff --git a/sysdeps/wordsize-64/globfree.c b/sysdeps/wordsize-64/globfree.c new file mode 100644 index 0000000000..ec8c35b489 --- /dev/null +++ b/sysdeps/wordsize-64/globfree.c @@ -0,0 +1,5 @@ +#define globfree64 __no_globfree64_decl +#include <posix/globfree.c> +#undef globfree64 +weak_alias (globfree, globfree64) +libc_hidden_ver (globfree, globfree64) diff --git a/sysdeps/wordsize-64/globfree64.c b/sysdeps/wordsize-64/globfree64.c new file mode 100644 index 0000000000..a0f57ff4b3 --- /dev/null +++ b/sysdeps/wordsize-64/globfree64.c @@ -0,0 +1 @@ +/* globfree64 is in globfree.c */ diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym index f6739fae81..33dd094e37 100644 --- a/sysdeps/x86/cpu-features-offsets.sym +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -15,6 +15,7 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) FAMILY_OFFSET offsetof (struct cpu_features, family) MODEL_OFFSET offsetof (struct cpu_features, model) +XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) FEATURE_OFFSET offsetof (struct cpu_features, feature) FEATURE_SIZE sizeof (unsigned int) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index d1ee922290..9eca98817d 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -18,6 +18,7 @@ #include <cpuid.h> #include <cpu-features.h> +#include <libc-internal.h> static void get_common_indeces (struct cpu_features *cpu_features, @@ -88,6 +89,71 @@ get_common_indeces (struct cpu_features *cpu_features, cpu_features->feature[index_arch_FMA_Usable] |= bit_arch_FMA_Usable; } + + /* For _dl_runtime_resolve, set xsave_state_size to xsave area + size + integer register save size and align it to 64 bytes. */ + if (cpu_features->max_cpuid >= 0xd) + { + unsigned int eax, ebx, ecx, edx; + + __cpuid_count (0xd, 0, eax, ebx, ecx, edx); + if (ebx != 0) + { + cpu_features->xsave_state_size + = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + + __cpuid_count (0xd, 1, eax, ebx, ecx, edx); + + /* Check if XSAVEC is available. */ + if ((eax & (1 << 1)) != 0) + { + unsigned int xstate_comp_offsets[32]; + unsigned int xstate_comp_sizes[32]; + unsigned int i; + + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = 160; + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; + + for (i = 2; i < 32; i++) + { + if ((STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); + xstate_comp_sizes[i] = eax; + } + else + { + ecx = 0; + xstate_comp_sizes[i] = 0; + } + + if (i > 2) + { + xstate_comp_offsets[i] + = (xstate_comp_offsets[i - 1] + + xstate_comp_sizes[i -1]); + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); + } + } + + /* Use XSAVEC. */ + unsigned int size + = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + if (size) + { + cpu_features->xsave_state_size + = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + cpu_features->feature[index_arch_XSAVEC_Usable] + |= bit_arch_XSAVEC_Usable; + } + } + } + } } } @@ -213,20 +279,6 @@ init_cpu_features (struct cpu_features *cpu_features) else cpu_features->feature[index_arch_Prefer_No_AVX512] |= bit_arch_Prefer_No_AVX512; - - /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow. - If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. */ - cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow] - |= bit_arch_Use_dl_runtime_resolve_slow; - if (cpu_features->max_cpuid >= 0xd) - { - unsigned int eax; - - __cpuid_count (0xd, 1, eax, ebx, ecx, edx); - if ((eax & (1 << 2)) != 0) - cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt] - |= bit_arch_Use_dl_runtime_resolve_opt; - } } /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index 2609ac0999..507a141414 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -37,9 +37,8 @@ #define bit_arch_Prefer_No_VZEROUPPER (1 << 17) #define bit_arch_Fast_Unaligned_Copy (1 << 18) #define bit_arch_Prefer_ERMS (1 << 19) -#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20) -#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21) -#define bit_arch_Prefer_No_AVX512 (1 << 22) +#define bit_arch_Prefer_No_AVX512 (1 << 20) +#define bit_arch_XSAVEC_Usable (1 << 21) /* CPUID Feature flags. */ @@ -82,6 +81,15 @@ /* The current maximum size of the feature integer bit array. */ #define FEATURE_INDEX_MAX 1 +/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need + space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be + aligned to 16 bytes for fxsave and 64 bytes for xsave. */ +#define STATE_SAVE_OFFSET (8 * 7 + 8) + +/* Save SSE, AVX, AVX512, mask and bound registers. */ +#define STATE_SAVE_MASK \ + ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) + #ifdef __ASSEMBLER__ # include <cpu-features-offsets.h> @@ -206,6 +214,12 @@ struct cpu_features } cpuid[COMMON_CPUID_INDEX_MAX]; unsigned int family; unsigned int model; + /* The type must be unsigned long int so that we use + + sub xsave_state_size_offset(%rip) %RSP_LP + + in _dl_runtime_resolve. */ + unsigned long int xsave_state_size; unsigned int feature[FEATURE_INDEX_MAX]; }; @@ -298,9 +312,8 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 # define index_arch_Prefer_ERMS FEATURE_INDEX_1 -# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 -# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 # define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1 +# define index_arch_XSAVEC_Usable FEATURE_INDEX_1 #endif /* !__ASSEMBLER__ */ diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 6d99284cd0..cc990a9685 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -27,7 +27,7 @@ ifeq ($(subdir),elf) CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ -mno-mmx) -sysdep-dl-routines += tlsdesc dl-tlsdesc +sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr tests += ifuncmain8 modules-names += ifuncmod8 @@ -49,9 +49,12 @@ extra-test-objs += tst-quadmod1pie.o tst-quadmod2pie.o $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o -tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 tst-audit10 -test-extras += tst-audit4-aux tst-audit10-aux -extra-test-objs += tst-audit4-aux.o tst-audit10-aux.o +tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 \ + tst-audit10 tst-sse tst-avx tst-avx512 +test-extras += tst-audit4-aux tst-audit10-aux \ + tst-avx-aux tst-avx512-aux +extra-test-objs += tst-audit4-aux.o tst-audit10-aux.o \ + tst-avx-aux.o tst-avx512-aux.o tests += tst-split-dynreloc LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds @@ -62,7 +65,8 @@ modules-names += tst-auditmod3a tst-auditmod3b \ tst-auditmod5a tst-auditmod5b \ tst-auditmod6a tst-auditmod6b tst-auditmod6c \ tst-auditmod7a tst-auditmod7b \ - tst-auditmod10a tst-auditmod10b + tst-auditmod10a tst-auditmod10b \ + tst-ssemod tst-avxmod tst-avx512mod $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so @@ -89,6 +93,10 @@ $(objpfx)tst-audit10: $(objpfx)tst-audit10-aux.o $(objpfx)tst-auditmod10a.so $(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so +$(objpfx)tst-sse: $(objpfx)tst-ssemod.so +$(objpfx)tst-avx: $(objpfx)tst-avx-aux.o $(objpfx)tst-avxmod.so +$(objpfx)tst-avx512: $(objpfx)tst-avx512-aux.o $(objpfx)tst-avx512mod.so + AVX-CFLAGS=-mavx -mno-vzeroupper CFLAGS-tst-audit4-aux.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS) @@ -96,14 +104,18 @@ CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS) +CFLAGS-tst-avx-aux.c += $(AVX-CFLAGS) +CFLAGS-tst-avxmod.c += $(AVX-CFLAGS) ifeq (yes,$(config-cflags-avx512)) AVX512-CFLAGS = -mavx512f CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS) CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS) +CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS) +CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS) endif endif ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym +gen-as-const-headers += tlsdesc.sym rtld-offsets.sym endif diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index c0f0fa16a2..8355432dfc 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -66,12 +66,9 @@ static inline int __attribute__ ((unused, always_inline)) elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) { Elf64_Addr *got; - extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_avx_slow (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_avx_opt (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_avx512_opt (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden; extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden; extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden; extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden; @@ -120,29 +117,14 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) /* This function will get called to fix up the GOT entry indicated by the offset on the stack, and then jump to the resolved address. */ - if (HAS_ARCH_FEATURE (AVX512F_Usable)) - { - if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt)) - *(ElfW(Addr) *) (got + 2) - = (ElfW(Addr)) &_dl_runtime_resolve_avx512_opt; - else - *(ElfW(Addr) *) (got + 2) - = (ElfW(Addr)) &_dl_runtime_resolve_avx512; - } - else if (HAS_ARCH_FEATURE (AVX_Usable)) - { - if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt)) - *(ElfW(Addr) *) (got + 2) - = (ElfW(Addr)) &_dl_runtime_resolve_avx_opt; - else if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_slow)) - *(ElfW(Addr) *) (got + 2) - = (ElfW(Addr)) &_dl_runtime_resolve_avx_slow; - else - *(ElfW(Addr) *) (got + 2) - = (ElfW(Addr)) &_dl_runtime_resolve_avx; - } + if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) + *(ElfW(Addr) *) (got + 2) + = (HAS_ARCH_FEATURE (XSAVEC_Usable) + ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec + : (ElfW(Addr)) &_dl_runtime_resolve_xsave); else - *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse; + *(ElfW(Addr) *) (got + 2) + = (ElfW(Addr)) &_dl_runtime_resolve_fxsave; } } diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c new file mode 100644 index 0000000000..3584805c8e --- /dev/null +++ b/sysdeps/x86_64/dl-tls.c @@ -0,0 +1,53 @@ +/* Thread-local storage handling in the ELF dynamic linker. x86-64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef SHARED +/* Work around GCC PR58066, due to which __tls_get_addr may be called + with an unaligned stack. The compat implementation is in + tls_get_addr-compat.S. */ + +# include <dl-tls.h> + +/* Define __tls_get_addr within elf/dl-tls.c under a different + name. */ +extern __typeof__ (__tls_get_addr) ___tls_get_addr; + +# define __tls_get_addr ___tls_get_addr +# include <elf/dl-tls.c> +# undef __tls_get_addr + +hidden_ver (___tls_get_addr, __tls_get_addr) + +/* Only handle slow paths for __tls_get_addr. */ +attribute_hidden +void * +__tls_get_addr_slow (GET_ADDR_ARGS) +{ + dtv_t *dtv = THREAD_DTV (); + + if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) + return update_get_addr (GET_ADDR_PARAM); + + return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); +} +#else + +/* No compatibility symbol needed. */ +# include <elf/dl-tls.c> + +#endif diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h index cf6c107f54..fa5bf6cd93 100644 --- a/sysdeps/x86_64/dl-tls.h +++ b/sysdeps/x86_64/dl-tls.h @@ -16,6 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#ifndef _X86_64_DL_TLS_H +#define _X86_64_DL_TLS_H + #include <stdint.h> /* Type used for the representation of TLS information in the GOT. */ @@ -27,3 +30,5 @@ typedef struct dl_tls_index extern void *__tls_get_addr (tls_index *ti); + +#endif /* _X86_64_DL_TLS_H */ diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 50b23633e3..b4cda0f535 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -34,41 +34,24 @@ # define DL_STACK_ALIGNMENT 8 #endif -#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE -/* The maximum size in bytes of unaligned vector load and store in the - dynamic linker. Since SSE optimized memory/string functions with - aligned SSE register load and store are used in the dynamic linker, - we must set this to 8 so that _dl_runtime_resolve_sse will align the - stack before calling _dl_fixup. */ -# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8 -#endif - -/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */ +/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align + stack to 16 bytes before calling _dl_fixup. */ #define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (VEC_SIZE > DL_STACK_ALIGNMENT \ - && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE) - -/* Align vector register save area to 16 bytes. */ -#define REGISTER_SAVE_VEC_OFF 0 + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || 16 > DL_STACK_ALIGNMENT) /* Area on stack to save and restore registers used for parameter passing when calling _dl_fixup. */ #ifdef __ILP32__ -# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) # define PRESERVE_BND_REGS_PREFIX #else -/* Align bound register save area to 16 bytes. */ -# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) -# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16) -# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16) -# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16) -# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16) # ifdef HAVE_MPX_SUPPORT # define PRESERVE_BND_REGS_PREFIX bnd # else # define PRESERVE_BND_REGS_PREFIX .byte 0xf2 # endif #endif +#define REGISTER_SAVE_RAX 0 #define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8) #define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) #define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8) @@ -80,68 +63,56 @@ #define VEC_SIZE 64 #define VMOVA vmovdqa64 -#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT -# define VMOV vmovdqa64 -#else -# define VMOV vmovdqu64 -#endif #define VEC(i) zmm##i -#define _dl_runtime_resolve _dl_runtime_resolve_avx512 #define _dl_runtime_profile _dl_runtime_profile_avx512 #include "dl-trampoline.h" -#undef _dl_runtime_resolve #undef _dl_runtime_profile #undef VEC -#undef VMOV #undef VMOVA #undef VEC_SIZE #define VEC_SIZE 32 #define VMOVA vmovdqa -#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT -# define VMOV vmovdqa -#else -# define VMOV vmovdqu -#endif #define VEC(i) ymm##i -#define _dl_runtime_resolve _dl_runtime_resolve_avx -#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx_opt #define _dl_runtime_profile _dl_runtime_profile_avx #include "dl-trampoline.h" -#undef _dl_runtime_resolve -#undef _dl_runtime_resolve_opt #undef _dl_runtime_profile #undef VEC -#undef VMOV #undef VMOVA #undef VEC_SIZE /* movaps/movups is 1-byte shorter. */ #define VEC_SIZE 16 #define VMOVA movaps -#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT -# define VMOV movaps -#else -# define VMOV movups -#endif #define VEC(i) xmm##i -#define _dl_runtime_resolve _dl_runtime_resolve_sse #define _dl_runtime_profile _dl_runtime_profile_sse #undef RESTORE_AVX #include "dl-trampoline.h" -#undef _dl_runtime_resolve #undef _dl_runtime_profile -#undef VMOV +#undef VEC #undef VMOVA +#undef VEC_SIZE -/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt - to preserve the full vector registers with zero upper bits. */ -#define VMOVA vmovdqa -#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT -# define VMOV vmovdqa -#else -# define VMOV vmovdqu -#endif -#define _dl_runtime_resolve _dl_runtime_resolve_sse_vex -#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx512_opt +#define USE_FXSAVE +#define STATE_SAVE_ALIGNMENT 16 +#define _dl_runtime_resolve _dl_runtime_resolve_fxsave +#include "dl-trampoline.h" +#undef _dl_runtime_resolve +#undef USE_FXSAVE +#undef STATE_SAVE_ALIGNMENT + +#define USE_XSAVE +#define STATE_SAVE_ALIGNMENT 64 +#define _dl_runtime_resolve _dl_runtime_resolve_xsave +#include "dl-trampoline.h" +#undef _dl_runtime_resolve +#undef USE_XSAVE +#undef STATE_SAVE_ALIGNMENT + +#define USE_XSAVEC +#define STATE_SAVE_ALIGNMENT 64 +#define _dl_runtime_resolve _dl_runtime_resolve_xsavec #include "dl-trampoline.h" +#undef _dl_runtime_resolve +#undef USE_XSAVEC +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index 32ad3af202..b9c2f1796f 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -16,140 +16,47 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#undef REGISTER_SAVE_AREA_RAW -#ifdef __ILP32__ -/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to - VEC7. */ -# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8) -#else -/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as - BND0, BND1, BND2, BND3 and VEC0 to VEC7. */ -# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8) -#endif + .text +#ifdef _dl_runtime_resolve -#undef REGISTER_SAVE_AREA -#undef LOCAL_STORAGE_AREA -#undef BASE -#if DL_RUNTIME_RESOLVE_REALIGN_STACK -# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8) -/* Local stack area before jumping to function address: RBX. */ -# define LOCAL_STORAGE_AREA 8 -# define BASE rbx -# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0 -# error REGISTER_SAVE_AREA must be multples of VEC_SIZE -# endif -#else -# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW -/* Local stack area before jumping to function address: All saved - registers. */ -# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA -# define BASE rsp -# if (REGISTER_SAVE_AREA % 16) != 8 -# error REGISTER_SAVE_AREA must be odd multples of 8 +# undef REGISTER_SAVE_AREA +# undef LOCAL_STORAGE_AREA +# undef BASE + +# if (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multples of 16 # endif -#endif - .text -#ifdef _dl_runtime_resolve_opt -/* Use the smallest vector registers to preserve the full YMM/ZMM - registers to avoid SSE transition penalty. */ - -# if VEC_SIZE == 32 -/* Check if the upper 128 bits in %ymm0 - %ymm7 registers are non-zero - and preserve %xmm0 - %xmm7 registers with the zero upper bits. Since - there is no SSE transition penalty on AVX512 processors which don't - support XGETBV with ECX == 1, _dl_runtime_resolve_avx512_slow isn't - provided. */ - .globl _dl_runtime_resolve_avx_slow - .hidden _dl_runtime_resolve_avx_slow - .type _dl_runtime_resolve_avx_slow, @function - .align 16 -_dl_runtime_resolve_avx_slow: - cfi_startproc - cfi_adjust_cfa_offset(16) # Incorporate PLT - vorpd %ymm0, %ymm1, %ymm8 - vorpd %ymm2, %ymm3, %ymm9 - vorpd %ymm4, %ymm5, %ymm10 - vorpd %ymm6, %ymm7, %ymm11 - vorpd %ymm8, %ymm9, %ymm9 - vorpd %ymm10, %ymm11, %ymm10 - vpcmpeqd %xmm8, %xmm8, %xmm8 - vorpd %ymm9, %ymm10, %ymm10 - vptest %ymm10, %ymm8 - # Preserve %ymm0 - %ymm7 registers if the upper 128 bits of any - # %ymm0 - %ymm7 registers aren't zero. - PRESERVE_BND_REGS_PREFIX - jnc _dl_runtime_resolve_avx - # Use vzeroupper to avoid SSE transition penalty. - vzeroupper - # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits - # when the upper 128 bits of %ymm0 - %ymm7 registers are zero. - PRESERVE_BND_REGS_PREFIX - jmp _dl_runtime_resolve_sse_vex - cfi_adjust_cfa_offset(-16) # Restore PLT adjustment - cfi_endproc - .size _dl_runtime_resolve_avx_slow, .-_dl_runtime_resolve_avx_slow +# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 +# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT # endif -/* Use XGETBV with ECX == 1 to check which bits in vector registers are - non-zero and only preserve the non-zero lower bits with zero upper - bits. */ - .globl _dl_runtime_resolve_opt - .hidden _dl_runtime_resolve_opt - .type _dl_runtime_resolve_opt, @function - .align 16 -_dl_runtime_resolve_opt: - cfi_startproc - cfi_adjust_cfa_offset(16) # Incorporate PLT - pushq %rax - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%rax, 0) - pushq %rcx - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%rcx, 0) - pushq %rdx - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%rdx, 0) - movl $1, %ecx - xgetbv - movl %eax, %r11d - popq %rdx - cfi_adjust_cfa_offset(-8) - cfi_restore (%rdx) - popq %rcx - cfi_adjust_cfa_offset(-8) - cfi_restore (%rcx) - popq %rax - cfi_adjust_cfa_offset(-8) - cfi_restore (%rax) -# if VEC_SIZE == 32 - # For YMM registers, check if YMM state is in use. - andl $bit_YMM_state, %r11d - # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits if - # YMM state isn't in use. - PRESERVE_BND_REGS_PREFIX - jz _dl_runtime_resolve_sse_vex -# elif VEC_SIZE == 16 - # For ZMM registers, check if YMM state and ZMM state are in - # use. - andl $(bit_YMM_state | bit_ZMM0_15_state), %r11d - cmpl $bit_YMM_state, %r11d - # Preserve %zmm0 - %zmm7 registers if ZMM state is in use. - PRESERVE_BND_REGS_PREFIX - jg _dl_runtime_resolve_avx512 - # Preserve %ymm0 - %ymm7 registers with the zero upper 256 bits if - # ZMM state isn't in use. - PRESERVE_BND_REGS_PREFIX - je _dl_runtime_resolve_avx - # Preserve %xmm0 - %xmm7 registers with the zero upper 384 bits if - # neither YMM state nor ZMM state are in use. +# if DL_RUNTIME_RESOLVE_REALIGN_STACK +/* Local stack area before jumping to function address: RBX. */ +# define LOCAL_STORAGE_AREA 8 +# define BASE rbx +# ifdef USE_FXSAVE +/* Use fxsave to save XMM registers. */ +# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) +# if (REGISTER_SAVE_AREA % 16) != 0 +# error REGISTER_SAVE_AREA must be multples of 16 +# endif +# endif # else -# error Unsupported VEC_SIZE! +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save XMM registers. */ +# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) +/* Local stack area before jumping to function address: All saved + registers. */ +# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA +# define BASE rsp +# if (REGISTER_SAVE_AREA % 16) != 8 +# error REGISTER_SAVE_AREA must be odd multples of 8 +# endif # endif - cfi_adjust_cfa_offset(-16) # Restore PLT adjustment - cfi_endproc - .size _dl_runtime_resolve_opt, .-_dl_runtime_resolve_opt -#endif + .globl _dl_runtime_resolve .hidden _dl_runtime_resolve .type _dl_runtime_resolve, @function @@ -157,19 +64,30 @@ _dl_runtime_resolve_opt: cfi_startproc _dl_runtime_resolve: cfi_adjust_cfa_offset(16) # Incorporate PLT -#if DL_RUNTIME_RESOLVE_REALIGN_STACK -# if LOCAL_STORAGE_AREA != 8 -# error LOCAL_STORAGE_AREA must be 8 -# endif +# if DL_RUNTIME_RESOLVE_REALIGN_STACK +# if LOCAL_STORAGE_AREA != 8 +# error LOCAL_STORAGE_AREA must be 8 +# endif pushq %rbx # push subtracts stack by 8. cfi_adjust_cfa_offset(8) cfi_rel_offset(%rbx, 0) mov %RSP_LP, %RBX_LP cfi_def_cfa_register(%rbx) - and $-VEC_SIZE, %RSP_LP -#endif + and $-STATE_SAVE_ALIGNMENT, %RSP_LP +# endif +# ifdef REGISTER_SAVE_AREA sub $REGISTER_SAVE_AREA, %RSP_LP +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +# else + # Allocate stack space of the required size to save the state. +# if IS_IN (rtld) + sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP +# else + sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP +# endif +# endif # Preserve registers otherwise clobbered. movq %rax, REGISTER_SAVE_RAX(%rsp) movq %rcx, REGISTER_SAVE_RCX(%rsp) @@ -178,59 +96,42 @@ _dl_runtime_resolve: movq %rdi, REGISTER_SAVE_RDI(%rsp) movq %r8, REGISTER_SAVE_R8(%rsp) movq %r9, REGISTER_SAVE_R9(%rsp) - VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp) - VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp) - VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp) - VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp) - VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp) - VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp) - VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp) - VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp) -#ifndef __ILP32__ - # We also have to preserve bound registers. These are nops if - # Intel MPX isn't available or disabled. -# ifdef HAVE_MPX_SUPPORT - bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) - bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) - bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) - bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) +# ifdef USE_FXSAVE + fxsave STATE_SAVE_OFFSET(%rsp) # else -# if REGISTER_SAVE_BND0 == 0 - .byte 0x66,0x0f,0x1b,0x04,0x24 + movl $STATE_SAVE_MASK, %eax + xorl %edx, %edx + # Clear the XSAVE Header. +# ifdef USE_XSAVE + movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp) +# endif + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp) +# ifdef USE_XSAVE + xsave STATE_SAVE_OFFSET(%rsp) # else - .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 + xsavec STATE_SAVE_OFFSET(%rsp) # endif - .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 - .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 - .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 # endif -#endif # Copy args pushed by PLT in register. # %rdi: link_map, %rsi: reloc_index mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP call _dl_fixup # Call resolver. mov %RAX_LP, %R11_LP # Save return value -#ifndef __ILP32__ - # Restore bound registers. These are nops if Intel MPX isn't - # avaiable or disabled. -# ifdef HAVE_MPX_SUPPORT - bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 - bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 - bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 - bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 + # Get register content back. +# ifdef USE_FXSAVE + fxrstor STATE_SAVE_OFFSET(%rsp) # else - .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 - .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 - .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 -# if REGISTER_SAVE_BND0 == 0 - .byte 0x66,0x0f,0x1a,0x04,0x24 -# else - .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 -# endif + movl $STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor STATE_SAVE_OFFSET(%rsp) # endif -#endif - # Get register content back. movq REGISTER_SAVE_R9(%rsp), %r9 movq REGISTER_SAVE_R8(%rsp), %r8 movq REGISTER_SAVE_RDI(%rsp), %rdi @@ -238,20 +139,12 @@ _dl_runtime_resolve: movq REGISTER_SAVE_RDX(%rsp), %rdx movq REGISTER_SAVE_RCX(%rsp), %rcx movq REGISTER_SAVE_RAX(%rsp), %rax - VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6) - VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7) -#if DL_RUNTIME_RESOLVE_REALIGN_STACK +# if DL_RUNTIME_RESOLVE_REALIGN_STACK mov %RBX_LP, %RSP_LP cfi_def_cfa_register(%rsp) movq (%rsp), %rbx cfi_restore(%rbx) -#endif +# endif # Adjust stack(PLT did 2 pushes) add $(LOCAL_STORAGE_AREA + 16), %RSP_LP cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16)) @@ -260,11 +153,9 @@ _dl_runtime_resolve: jmp *%r11 # Jump to function address. cfi_endproc .size _dl_runtime_resolve, .-_dl_runtime_resolve +#endif -/* To preserve %xmm0 - %xmm7 registers, dl-trampoline.h is included - twice, for _dl_runtime_resolve_sse and _dl_runtime_resolve_sse_vex. - But we don't need another _dl_runtime_profile for XMM registers. */ #if !defined PROF && defined _dl_runtime_profile # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 # error LR_VECTOR_OFFSET must be multples of VEC_SIZE diff --git a/sysdeps/x86_64/rtld-offsets.sym b/sysdeps/x86_64/rtld-offsets.sym new file mode 100644 index 0000000000..fd41b51521 --- /dev/null +++ b/sysdeps/x86_64/rtld-offsets.sym @@ -0,0 +1,6 @@ +#define SHARED +#include <ldsodefs.h> + +-- + +GL_TLS_GENERATION_OFFSET offsetof (struct rtld_global, _dl_tls_generation) diff --git a/sysdeps/x86_64/tls_get_addr.S b/sysdeps/x86_64/tls_get_addr.S new file mode 100644 index 0000000000..9d38fb3be5 --- /dev/null +++ b/sysdeps/x86_64/tls_get_addr.S @@ -0,0 +1,61 @@ +/* Stack-aligning implementation of __tls_get_addr. x86-64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef SHARED + +# include <sysdep.h> +# include "tlsdesc.h" +# include "rtld-offsets.h" + +/* See __tls_get_addr and __tls_get_addr_slow in dl-tls.c. This function + call __tls_get_addr_slow on both slow paths. It realigns the stack + before the call to work around GCC PR58066. */ + +ENTRY (__tls_get_addr) + mov %fs:DTV_OFFSET, %RDX_LP + mov GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %RAX_LP + /* GL(dl_tls_generation) == dtv[0].counter */ + cmp %RAX_LP, (%rdx) + jne 1f + mov TI_MODULE_OFFSET(%rdi), %RAX_LP + /* dtv[ti->ti_module] */ +# ifdef __LP64__ + salq $4, %rax + movq (%rdx,%rax), %rax +# else + movl (%rdx,%rax, 8), %eax +# endif + cmp $-1, %RAX_LP + je 1f + add TI_OFFSET_OFFSET(%rdi), %RAX_LP + ret +1: + /* On the slow path, align the stack. */ + pushq %rbp + cfi_def_cfa_offset (16) + cfi_offset (%rbp, -16) + mov %RSP_LP, %RBP_LP + cfi_def_cfa_register (%rbp) + and $-16, %RSP_LP + call __tls_get_addr_slow + mov %RBP_LP, %RSP_LP + popq %rbp + cfi_def_cfa (%rsp, 8) + ret +END (__tls_get_addr) +#endif /* SHARED */ diff --git a/sysdeps/x86_64/tlsdesc.sym b/sysdeps/x86_64/tlsdesc.sym index 33854975d0..fc897ab4b5 100644 --- a/sysdeps/x86_64/tlsdesc.sym +++ b/sysdeps/x86_64/tlsdesc.sym @@ -15,3 +15,6 @@ TLSDESC_ARG offsetof(struct tlsdesc, arg) TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) + +TI_MODULE_OFFSET offsetof(tls_index, ti_module) +TI_OFFSET_OFFSET offsetof(tls_index, ti_offset) diff --git a/sysdeps/x86_64/tst-avx-aux.c b/sysdeps/x86_64/tst-avx-aux.c new file mode 100644 index 0000000000..e3807de7bb --- /dev/null +++ b/sysdeps/x86_64/tst-avx-aux.c @@ -0,0 +1,47 @@ +/* Test case for preserved AVX registers in dynamic linker, -mavx part. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <immintrin.h> +#include <stdlib.h> +#include <string.h> + +int +tst_avx_aux (void) +{ +#ifdef __AVX__ + extern __m256i avx_test (__m256i, __m256i, __m256i, __m256i, + __m256i, __m256i, __m256i, __m256i); + + __m256i ymm0 = _mm256_set1_epi32 (0); + __m256i ymm1 = _mm256_set1_epi32 (1); + __m256i ymm2 = _mm256_set1_epi32 (2); + __m256i ymm3 = _mm256_set1_epi32 (3); + __m256i ymm4 = _mm256_set1_epi32 (4); + __m256i ymm5 = _mm256_set1_epi32 (5); + __m256i ymm6 = _mm256_set1_epi32 (6); + __m256i ymm7 = _mm256_set1_epi32 (7); + __m256i ret = avx_test (ymm0, ymm1, ymm2, ymm3, + ymm4, ymm5, ymm6, ymm7); + ymm0 = _mm256_set1_epi32 (0x12349876); + if (memcmp (&ymm0, &ret, sizeof (ret))) + abort (); + return 0; +#else /* __AVX__ */ + return 77; +#endif /* __AVX__ */ +} diff --git a/sysdeps/x86_64/tst-avx.c b/sysdeps/x86_64/tst-avx.c new file mode 100644 index 0000000000..ec2e3a79ff --- /dev/null +++ b/sysdeps/x86_64/tst-avx.c @@ -0,0 +1,49 @@ +/* Test case for preserved AVX registers in dynamic linker. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> + +int tst_avx_aux (void); + +static int +avx_enabled (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) + return 0; + + /* Check the OS has AVX and SSE saving enabled. */ + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); + + return (eax & 6) == 6; +} + +static int +do_test (void) +{ + /* Run AVX test only if AVX is supported. */ + if (avx_enabled ()) + return tst_avx_aux (); + else + return 77; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" diff --git a/sysdeps/x86_64/tst-avx512-aux.c b/sysdeps/x86_64/tst-avx512-aux.c new file mode 100644 index 0000000000..6cebc523f2 --- /dev/null +++ b/sysdeps/x86_64/tst-avx512-aux.c @@ -0,0 +1,48 @@ +/* Test case for preserved AVX512 registers in dynamic linker, + -mavx512 part. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <immintrin.h> +#include <stdlib.h> +#include <string.h> + +int +tst_avx512_aux (void) +{ +#ifdef __AVX512F__ + extern __m512i avx512_test (__m512i, __m512i, __m512i, __m512i, + __m512i, __m512i, __m512i, __m512i); + + __m512i zmm0 = _mm512_set1_epi32 (0); + __m512i zmm1 = _mm512_set1_epi32 (1); + __m512i zmm2 = _mm512_set1_epi32 (2); + __m512i zmm3 = _mm512_set1_epi32 (3); + __m512i zmm4 = _mm512_set1_epi32 (4); + __m512i zmm5 = _mm512_set1_epi32 (5); + __m512i zmm6 = _mm512_set1_epi32 (6); + __m512i zmm7 = _mm512_set1_epi32 (7); + __m512i ret = avx512_test (zmm0, zmm1, zmm2, zmm3, + zmm4, zmm5, zmm6, zmm7); + zmm0 = _mm512_set1_epi32 (0x12349876); + if (memcmp (&zmm0, &ret, sizeof (ret))) + abort (); + return 0; +#else /* __AVX512F__ */ + return 77; +#endif /* __AVX512F__ */ +} diff --git a/sysdeps/x86_64/tst-avx512.c b/sysdeps/x86_64/tst-avx512.c new file mode 100644 index 0000000000..a8e42ef553 --- /dev/null +++ b/sysdeps/x86_64/tst-avx512.c @@ -0,0 +1,57 @@ +/* Test case for preserved AVX512 registers in dynamic linker. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> + +int tst_avx512_aux (void); + +static int +avx512_enabled (void) +{ +#ifdef bit_AVX512F + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) + return 0; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + if (!(ebx & bit_AVX512F)) + return 0; + + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); + + /* Verify that ZMM, YMM and XMM states are enabled. */ + return (eax & 0xe6) == 0xe6; +#else + return 0; +#endif +} + +static int +do_test (void) +{ + /* Run AVX512 test only if AVX512 is supported. */ + if (avx512_enabled ()) + return tst_avx512_aux (); + else + return 77; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" diff --git a/sysdeps/x86_64/tst-avx512mod.c b/sysdeps/x86_64/tst-avx512mod.c new file mode 100644 index 0000000000..4cfb3a2c3d --- /dev/null +++ b/sysdeps/x86_64/tst-avx512mod.c @@ -0,0 +1,48 @@ +/* Test case for x86-64 preserved AVX512 registers in dynamic linker. */ + +#ifdef __AVX512F__ +#include <stdlib.h> +#include <string.h> +#include <immintrin.h> + +__m512i +avx512_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3, + __m512i x4, __m512i x5, __m512i x6, __m512i x7) +{ + __m512i zmm; + + zmm = _mm512_set1_epi32 (0); + if (memcmp (&zmm, &x0, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (1); + if (memcmp (&zmm, &x1, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (2); + if (memcmp (&zmm, &x2, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (3); + if (memcmp (&zmm, &x3, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (4); + if (memcmp (&zmm, &x4, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (5); + if (memcmp (&zmm, &x5, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (6); + if (memcmp (&zmm, &x6, sizeof (zmm))) + abort (); + + zmm = _mm512_set1_epi32 (7); + if (memcmp (&zmm, &x7, sizeof (zmm))) + abort (); + + return _mm512_set1_epi32 (0x12349876); +} +#endif diff --git a/sysdeps/x86_64/tst-avxmod.c b/sysdeps/x86_64/tst-avxmod.c new file mode 100644 index 0000000000..6e5b154997 --- /dev/null +++ b/sysdeps/x86_64/tst-avxmod.c @@ -0,0 +1,48 @@ +/* Test case for x86-64 preserved AVX registers in dynamic linker. */ + +#ifdef __AVX__ +#include <stdlib.h> +#include <string.h> +#include <immintrin.h> + +__m256i +avx_test (__m256i x0, __m256i x1, __m256i x2, __m256i x3, + __m256i x4, __m256i x5, __m256i x6, __m256i x7) +{ + __m256i ymm; + + ymm = _mm256_set1_epi32 (0); + if (memcmp (&ymm, &x0, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (1); + if (memcmp (&ymm, &x1, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (2); + if (memcmp (&ymm, &x2, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (3); + if (memcmp (&ymm, &x3, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (4); + if (memcmp (&ymm, &x4, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (5); + if (memcmp (&ymm, &x5, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (6); + if (memcmp (&ymm, &x6, sizeof (ymm))) + abort (); + + ymm = _mm256_set1_epi32 (7); + if (memcmp (&ymm, &x7, sizeof (ymm))) + abort (); + + return _mm256_set1_epi32 (0x12349876); +} +#endif diff --git a/sysdeps/x86_64/tst-sse.c b/sysdeps/x86_64/tst-sse.c new file mode 100644 index 0000000000..dd1537cf27 --- /dev/null +++ b/sysdeps/x86_64/tst-sse.c @@ -0,0 +1,46 @@ +/* Test case for preserved SSE registers in dynamic linker. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <immintrin.h> +#include <stdlib.h> +#include <string.h> + +extern __m128i sse_test (__m128i, __m128i, __m128i, __m128i, + __m128i, __m128i, __m128i, __m128i); + +static int +do_test (void) +{ + __m128i xmm0 = _mm_set1_epi32 (0); + __m128i xmm1 = _mm_set1_epi32 (1); + __m128i xmm2 = _mm_set1_epi32 (2); + __m128i xmm3 = _mm_set1_epi32 (3); + __m128i xmm4 = _mm_set1_epi32 (4); + __m128i xmm5 = _mm_set1_epi32 (5); + __m128i xmm6 = _mm_set1_epi32 (6); + __m128i xmm7 = _mm_set1_epi32 (7); + __m128i ret = sse_test (xmm0, xmm1, xmm2, xmm3, + xmm4, xmm5, xmm6, xmm7); + xmm0 = _mm_set1_epi32 (0x12349876); + if (memcmp (&xmm0, &ret, sizeof (ret))) + abort (); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" diff --git a/sysdeps/x86_64/tst-ssemod.c b/sysdeps/x86_64/tst-ssemod.c new file mode 100644 index 0000000000..907a64c69e --- /dev/null +++ b/sysdeps/x86_64/tst-ssemod.c @@ -0,0 +1,46 @@ +/* Test case for x86-64 preserved SSE registers in dynamic linker. */ + +#include <stdlib.h> +#include <string.h> +#include <immintrin.h> + +__m128i +sse_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3, + __m128i x4, __m128i x5, __m128i x6, __m128i x7) +{ + __m128i xmm; + + xmm = _mm_set1_epi32 (0); + if (memcmp (&xmm, &x0, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (1); + if (memcmp (&xmm, &x1, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (2); + if (memcmp (&xmm, &x2, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (3); + if (memcmp (&xmm, &x3, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (4); + if (memcmp (&xmm, &x4, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (5); + if (memcmp (&xmm, &x5, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (6); + if (memcmp (&xmm, &x6, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (7); + if (memcmp (&xmm, &x7, sizeof (xmm))) + abort (); + + return _mm_set1_epi32 (0x12349876); +} |