From 14fda2e2ef67727228e3b7e2dea174cac4ffd19d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 31 Jul 2015 07:30:04 -0700 Subject: Add _dl_x86_cpu_features to rtld_global in ld.so This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so and initializes it early before __libc_start_main is called so that cpu_features is always available when it is used and we can avoid calling __init_cpu_features in IFUNC selectors. * sysdeps/i386/dl-machine.h: Include . (dl_platform_init): Call init_cpu_features. * sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New. * sysdeps/i386/i686/cacheinfo.c (DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed. * sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch. * sysdeps/i386/i686/multiarch/Versions: Removed. * sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET): Removed. * sysdeps/i386/ldsodefs.h: Include . * sysdeps/unix/sysv/linux/x86/Makefile (libpthread-sysdep_routines): Remove init-arch. * sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include instead of sysdeps/generic/dl-procinfo.c>. * sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers): Add cpu-features-offsets.sym and rtld-global-offsets.sym. [$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features. [$(subdir) == elf] (sysdep-rtld-routines): Likewise. [$(subdir) == elf] (sysdep_routines): Likewise. [$(subdir) == elf] (elide-routines.os): Likewise. [$(subdir) == elf] (tests): Add tst-get-cpu-features. [$(subdir) == elf] (tests-static): Add tst-get-cpu-features-static. * sysdeps/x86/Versions: New file. * sysdeps/x86/cpu-features-offsets.sym: Likewise. * sysdeps/x86/cpu-features.c: Likewise. * sysdeps/x86/cpu-features.h: Likewise. * sysdeps/x86/dl-get-cpu-features.c: Likewise. * sysdeps/x86/libc-start.c: Likewise. * sysdeps/x86/rtld-global-offsets.sym: Likewise. * sysdeps/x86/tst-get-cpu-features-static.c: Likewise. * sysdeps/x86/tst-get-cpu-features.c: Likewise. * sysdeps/x86_64/dl-procinfo.c: Likewise. * sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed. Assume USE_MULTIARCH is defined and don't check it. (is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features). (is_amd): Likewise. (max_cpuid): Likewise. (intel_check_word): Likewise. (__cache_sysconf): Don't call __init_cpu_features. (__x86_preferred_memory_instruction): Removed. (init_cacheinfo): Don't call __init_cpu_features. Replace __cpu_features with GLRO(dl_x86_cpu_features). * sysdeps/x86_64/dl-machine.h: . (dl_platform_init): Call init_cpu_features. * sysdeps/x86_64/ldsodefs.h: Include . * sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch. * sysdeps/x86_64/multiarch/Versions: Removed. * sysdeps/x86_64/multiarch/cacheinfo.c: Likewise. * sysdeps/x86_64/multiarch/init-arch.c: Likewise. * sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET): Removed. * sysdeps/x86_64/multiarch/init-arch.h: Rewrite. --- sysdeps/i386/dl-machine.h | 3 + sysdeps/i386/dl-procinfo.c | 16 ++ sysdeps/i386/i686/cacheinfo.c | 1 - sysdeps/i386/i686/multiarch/Makefile | 1 - sysdeps/i386/i686/multiarch/Versions | 5 - sysdeps/i386/i686/multiarch/ifunc-defines.sym | 1 - sysdeps/i386/ldsodefs.h | 1 + sysdeps/unix/sysv/linux/x86/Makefile | 1 - sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c | 2 +- sysdeps/x86/Makefile | 14 ++ sysdeps/x86/Versions | 5 + sysdeps/x86/cpu-features-offsets.sym | 7 + sysdeps/x86/cpu-features.c | 202 ++++++++++++++++++++++ sysdeps/x86/cpu-features.h | 240 ++++++++++++++++++++++++++ sysdeps/x86/dl-get-cpu-features.c | 27 +++ sysdeps/x86/libc-start.c | 41 +++++ sysdeps/x86/rtld-global-offsets.sym | 7 + sysdeps/x86/tst-get-cpu-features-static.c | 1 + sysdeps/x86/tst-get-cpu-features.c | 31 ++++ sysdeps/x86_64/cacheinfo.c | 150 ++++------------ sysdeps/x86_64/dl-machine.h | 3 + sysdeps/x86_64/dl-procinfo.c | 57 ++++++ sysdeps/x86_64/ldsodefs.h | 1 + sysdeps/x86_64/multiarch/Makefile | 1 - sysdeps/x86_64/multiarch/Versions | 5 - sysdeps/x86_64/multiarch/cacheinfo.c | 2 - sysdeps/x86_64/multiarch/ifunc-defines.sym | 1 - sysdeps/x86_64/multiarch/init-arch.c | 206 ---------------------- sysdeps/x86_64/multiarch/init-arch.h | 177 +------------------ 29 files changed, 692 insertions(+), 517 deletions(-) delete mode 100644 sysdeps/i386/i686/multiarch/Versions create mode 100644 sysdeps/x86/Versions create mode 100644 sysdeps/x86/cpu-features-offsets.sym create mode 100644 sysdeps/x86/cpu-features.c create mode 100644 sysdeps/x86/cpu-features.h create mode 100644 sysdeps/x86/dl-get-cpu-features.c create mode 100644 sysdeps/x86/libc-start.c create mode 100644 sysdeps/x86/rtld-global-offsets.sym create mode 100644 sysdeps/x86/tst-get-cpu-features-static.c create mode 100644 sysdeps/x86/tst-get-cpu-features.c create mode 100644 sysdeps/x86_64/dl-procinfo.c delete mode 100644 sysdeps/x86_64/multiarch/Versions delete mode 100644 sysdeps/x86_64/multiarch/cacheinfo.c delete mode 100644 sysdeps/x86_64/multiarch/init-arch.c diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index a8a90f1145..fe75d594ba 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -25,6 +25,7 @@ #include #include #include +#include /* Return nonzero iff ELF header is compatible with the running host. */ static inline int __attribute__ ((unused)) @@ -235,6 +236,8 @@ dl_platform_init (void) if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') /* Avoid an empty string which would disturb us. */ GLRO(dl_platform) = NULL; + + init_cpu_features (&GLRO(dl_x86_cpu_features)); } static inline Elf32_Addr diff --git a/sysdeps/i386/dl-procinfo.c b/sysdeps/i386/dl-procinfo.c index b673b3c848..e95f3352aa 100644 --- a/sysdeps/i386/dl-procinfo.c +++ b/sysdeps/i386/dl-procinfo.c @@ -43,6 +43,22 @@ # define PROCINFO_CLASS #endif +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_cpu_features +# else +PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features +# endif +# ifndef PROCINFO_DECL += { } +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif + #if !defined PROCINFO_DECL && defined SHARED ._dl_x86_cap_flags #else diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c index 0f869df4d8..0b50c6d346 100644 --- a/sysdeps/i386/i686/cacheinfo.c +++ b/sysdeps/i386/i686/cacheinfo.c @@ -1,4 +1,3 @@ #define DISABLE_PREFETCHW -#define DISABLE_PREFERRED_MEMORY_INSTRUCTION #include diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 11ce4ba1e3..31bfd39bae 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,5 +1,4 @@ ifeq ($(subdir),csu) -aux += init-arch tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif diff --git a/sysdeps/i386/i686/multiarch/Versions b/sysdeps/i386/i686/multiarch/Versions deleted file mode 100644 index 59b185ac8d..0000000000 --- a/sysdeps/i386/i686/multiarch/Versions +++ /dev/null @@ -1,5 +0,0 @@ -libc { - GLIBC_PRIVATE { - __get_cpu_features; - } -} diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym index eb1538abcc..96e9cfaf61 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-defines.sym +++ b/sysdeps/i386/i686/multiarch/ifunc-defines.sym @@ -4,7 +4,6 @@ -- CPU_FEATURES_SIZE sizeof (struct cpu_features) -KIND_OFFSET offsetof (struct cpu_features, kind) CPUID_OFFSET offsetof (struct cpu_features, cpuid) CPUID_SIZE sizeof (struct cpuid_registers) CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) diff --git a/sysdeps/i386/ldsodefs.h b/sysdeps/i386/ldsodefs.h index d80cf0185e..dae2d04ed6 100644 --- a/sysdeps/i386/ldsodefs.h +++ b/sysdeps/i386/ldsodefs.h @@ -20,6 +20,7 @@ #define _I386_LDSODEFS_H 1 #include +#include struct La_i86_regs; struct La_i86_retval; diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile index d6be472612..9e6ec44b3a 100644 --- a/sysdeps/unix/sysv/linux/x86/Makefile +++ b/sysdeps/unix/sysv/linux/x86/Makefile @@ -15,7 +15,6 @@ sysdep_headers += sys/elf.h sys/perm.h sys/reg.h sys/vm86.h sys/debugreg.h sys/i endif ifeq ($(subdir),nptl) -libpthread-sysdep_routines += init-arch libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ elision-trylock endif diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c b/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c index 8ac351ec97..a3c0c1931e 100644 --- a/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c +++ b/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c @@ -1,5 +1,5 @@ #if IS_IN (ldconfig) # include #else -# include +# include #endif diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 19f5eca741..74d2b17f8f 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -8,3 +8,17 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/x86/tst-ld-sse-use.sh $(objpfx)ld.so $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ $(evaluate-test) endif + +ifeq ($(subdir),csu) +gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym +endif + +ifeq ($(subdir),elf) +sysdep-ld-routines += dl-get-cpu-features +sysdep-rtld-routines += dl-get-cpu-features +sysdep_routines += dl-get-cpu-features +elide-routines.os += dl-get-cpu-features + +tests += tst-get-cpu-features +tests-static += tst-get-cpu-features-static +endif diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions new file mode 100644 index 0000000000..e02923708e --- /dev/null +++ b/sysdeps/x86/Versions @@ -0,0 +1,5 @@ +ld { + GLIBC_PRIVATE { + __get_cpu_features; + } +} diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym new file mode 100644 index 0000000000..a9d53d195f --- /dev/null +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c new file mode 100644 index 0000000000..587080c1a4 --- /dev/null +++ b/sysdeps/x86/cpu-features.c @@ -0,0 +1,202 @@ +/* Initialize CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2008-2015 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +static inline void +get_common_indeces (struct cpu_features *cpu_features, + unsigned int *family, unsigned int *model) +{ + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; +} + +static inline void +init_cpu_features (struct cpu_features *cpu_features) +{ + unsigned int ebx, ecx, edx; + unsigned int family = 0; + unsigned int model = 0; + enum cpu_features_kind kind; + + __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); + + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { + kind = arch_kind_intel; + + get_common_indeces (cpu_features, &family, &model); + + unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax; + unsigned int extended_family = (eax >> 20) & 0xff; + unsigned int extended_model = (eax >> 12) & 0xf0; + if (family == 0x0f) + { + family += extended_family; + model += extended_model; + } + else if (family == 0x06) + { + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + model += extended_model; + switch (model) + { + case 0x1c: + case 0x26: + /* BSF is slow on Atom. */ + cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF; + break; + + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Unaligned load versions are faster than SSSE3 + on Silvermont. */ +#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +#endif +#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 +# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 +#endif + cpu_features->feature[index_Fast_Unaligned_Load] + |= (bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop + | bit_Slow_SSE4_2); + break; + + default: + /* Unknown family 0x06 processors. Assuming this is one + of Core i3/i5/i7 processors if AVX is available. */ + if ((ecx & bit_AVX) == 0) + break; + + case 0x1a: + case 0x1e: + case 0x1f: + case 0x25: + case 0x2c: + case 0x2e: + case 0x2f: + /* Rep string instructions, copy backward, unaligned loads + and pminub are fast on Intel Core i3, i5 and i7. */ +#if index_Fast_Rep_String != index_Fast_Copy_Backward +# error index_Fast_Rep_String != index_Fast_Copy_Backward +#endif +#if index_Fast_Rep_String != index_Fast_Unaligned_Load +# error index_Fast_Rep_String != index_Fast_Unaligned_Load +#endif +#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +#endif + cpu_features->feature[index_Fast_Rep_String] + |= (bit_Fast_Rep_String + | bit_Fast_Copy_Backward + | bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop); + break; + } + } + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { + kind = arch_kind_amd; + + get_common_indeces (cpu_features, &family, &model); + + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + + unsigned int eax; + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); + } + else + kind = arch_kind_other; + + if (cpu_features->max_cpuid >= 7) + __cpuid_count (7, 0, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + + /* Can we call xgetbv? */ + if (HAS_CPU_FEATURE (OSXSAVE)) + { + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (HAS_CPU_FEATURE (AVX)) + cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable; +#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +#endif + /* Determine if AVX2 is usable. Unaligned load with 256-bit + AVX registers are faster on processors with AVX2. */ + if (HAS_CPU_FEATURE (AVX2)) + cpu_features->feature[index_AVX2_Usable] + |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state + | bit_ZMM16_31_state)) == + (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + { + /* Determine if AVX512F is usable. */ + if (HAS_CPU_FEATURE (AVX512F)) + { + cpu_features->feature[index_AVX512F_Usable] + |= bit_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (HAS_CPU_FEATURE (AVX512DQ)) + cpu_features->feature[index_AVX512DQ_Usable] + |= bit_AVX512DQ_Usable; + } + } + /* Determine if FMA is usable. */ + if (HAS_CPU_FEATURE (FMA)) + cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable; + /* Determine if FMA4 is usable. */ + if (HAS_CPU_FEATURE (FMA4)) + cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable; + } + } + + cpu_features->family = family; + cpu_features->model = model; + cpu_features->kind = kind; +} diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h new file mode 100644 index 0000000000..22e5abb919 --- /dev/null +++ b/sysdeps/x86/cpu-features.h @@ -0,0 +1,240 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008-2015 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef cpu_features_h +#define cpu_features_h + +#define bit_Fast_Rep_String (1 << 0) +#define bit_Fast_Copy_Backward (1 << 1) +#define bit_Slow_BSF (1 << 2) +#define bit_Fast_Unaligned_Load (1 << 4) +#define bit_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_AVX_Usable (1 << 6) +#define bit_FMA_Usable (1 << 7) +#define bit_FMA4_Usable (1 << 8) +#define bit_Slow_SSE4_2 (1 << 9) +#define bit_AVX2_Usable (1 << 10) +#define bit_AVX_Fast_Unaligned_Load (1 << 11) +#define bit_AVX512F_Usable (1 << 12) +#define bit_AVX512DQ_Usable (1 << 13) + +/* CPUID Feature flags. */ + +/* COMMON_CPUID_INDEX_1. */ +#define bit_SSE2 (1 << 26) +#define bit_SSSE3 (1 << 9) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_OSXSAVE (1 << 27) +#define bit_AVX (1 << 28) +#define bit_POPCOUNT (1 << 23) +#define bit_FMA (1 << 12) +#define bit_FMA4 (1 << 16) + +/* COMMON_CPUID_INDEX_7. */ +#define bit_RTM (1 << 11) +#define bit_AVX2 (1 << 5) +#define bit_AVX512F (1 << 16) +#define bit_AVX512DQ (1 << 17) + +/* XCR0 Feature flags. */ +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (2 << 1) +#define bit_Opmask_state (1 << 5) +#define bit_ZMM0_15_state (1 << 6) +#define bit_ZMM16_31_state (1 << 7) + +/* The integer bit array index for the first set of internal feature bits. */ +#define FEATURE_INDEX_1 0 + +/* The current maximum size of the feature integer bit array. */ +#define FEATURE_INDEX_MAX 1 + +#ifdef __ASSEMBLER__ + +# include +# include + +# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET + +# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE +# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE +# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE + +# if defined (_LIBC) && !IS_IN (nonlib) +# ifdef __x86_64__ +# ifdef SHARED +# if IS_IN (rtld) +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip) +# else +# define LOAD_RTLD_GLOBAL_RO_RDX \ + mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx) +# endif +# else /* SHARED */ +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip) +# endif /* !SHARED */ +# else /* __x86_64__ */ +# ifdef SHARED +# define LOAD_FUNC_GOT_EAX(func) \ + leal func@GOTOFF(%edx), %eax +# if IS_IN (rtld) +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx) +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx) +# else +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx); \ + mov _rtld_global_ro@GOT(%edx), %ecx +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx) +# endif +# else /* SHARED */ +# define LOAD_FUNC_GOT_EAX(func) \ + leal func, %eax +# define LOAD_GOT_AND_RTLD_GLOBAL_RO +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name) +# endif /* !SHARED */ +# endif /* !__x86_64__ */ +# else /* _LIBC && !nonlib */ +# error "Sorry, is unimplemented for assembler" +# endif /* !_LIBC || nonlib */ + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name) +# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name) + +#else /* __ASSEMBLER__ */ + +enum + { + COMMON_CPUID_INDEX_1 = 0, + COMMON_CPUID_INDEX_7, + COMMON_CPUID_INDEX_80000001, /* for AMD */ + /* Keep the following line at the end. */ + COMMON_CPUID_INDEX_MAX + }; + +struct cpu_features +{ + enum cpu_features_kind + { + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other + } kind; + int max_cpuid; + struct cpuid_registers + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + } cpuid[COMMON_CPUID_INDEX_MAX]; + unsigned int family; + unsigned int model; + unsigned int feature[FEATURE_INDEX_MAX]; +}; + +/* Used from outside of glibc to get access to the CPU features + structure. */ +extern const struct cpu_features *__get_cpu_features (void) + __attribute__ ((const)); + +# if defined (_LIBC) && !IS_IN (nonlib) +/* Unused for x86. */ +# define INIT_ARCH() +# define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) +# endif + + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) \ + ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0) +# define HAS_ARCH_FEATURE(name) \ + ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) + +# define index_SSE2 COMMON_CPUID_INDEX_1 +# define index_SSSE3 COMMON_CPUID_INDEX_1 +# define index_SSE4_1 COMMON_CPUID_INDEX_1 +# define index_SSE4_2 COMMON_CPUID_INDEX_1 +# define index_AVX COMMON_CPUID_INDEX_1 +# define index_AVX2 COMMON_CPUID_INDEX_7 +# define index_AVX512F COMMON_CPUID_INDEX_7 +# define index_AVX512DQ COMMON_CPUID_INDEX_7 +# define index_RTM COMMON_CPUID_INDEX_7 +# define index_FMA COMMON_CPUID_INDEX_1 +# define index_FMA4 COMMON_CPUID_INDEX_80000001 +# define index_POPCOUNT COMMON_CPUID_INDEX_1 +# define index_OSXSAVE COMMON_CPUID_INDEX_1 + +# define reg_SSE2 edx +# define reg_SSSE3 ecx +# define reg_SSE4_1 ecx +# define reg_SSE4_2 ecx +# define reg_AVX ecx +# define reg_AVX2 ebx +# define reg_AVX512F ebx +# define reg_AVX512DQ ebx +# define reg_RTM ebx +# define reg_FMA ecx +# define reg_FMA4 ecx +# define reg_POPCOUNT ecx +# define reg_OSXSAVE ecx + +# define index_Fast_Rep_String FEATURE_INDEX_1 +# define index_Fast_Copy_Backward FEATURE_INDEX_1 +# define index_Slow_BSF FEATURE_INDEX_1 +# define index_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 +# define index_AVX_Usable FEATURE_INDEX_1 +# define index_FMA_Usable FEATURE_INDEX_1 +# define index_FMA4_Usable FEATURE_INDEX_1 +# define index_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_AVX2_Usable FEATURE_INDEX_1 +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_AVX512F_Usable FEATURE_INDEX_1 +# define index_AVX512DQ_Usable FEATURE_INDEX_1 + +#endif /* !__ASSEMBLER__ */ + +#endif /* cpu_features_h */ diff --git a/sysdeps/x86/dl-get-cpu-features.c b/sysdeps/x86/dl-get-cpu-features.c new file mode 100644 index 0000000000..080e5e8532 --- /dev/null +++ b/sysdeps/x86/dl-get-cpu-features.c @@ -0,0 +1,27 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2015 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include + +#undef __get_cpu_features + +const struct cpu_features * +__get_cpu_features (void) +{ + return &GLRO(dl_x86_cpu_features); +} diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c new file mode 100644 index 0000000000..9f0c0457c1 --- /dev/null +++ b/sysdeps/x86/libc-start.c @@ -0,0 +1,41 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef SHARED +# include +# else +/* The main work is done in the generic function. */ +# define LIBC_START_DISABLE_INLINE +# define LIBC_START_MAIN generic_start_main +# include +# include +# include + +extern struct cpu_features _dl_x86_cpu_features; + +int +__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + int argc, char **argv, + __typeof (main) init, + void (*fini) (void), + void (*rtld_fini) (void), void *stack_end) +{ + init_cpu_features (&_dl_x86_cpu_features); + return generic_start_main (main, argc, argv, init, fini, rtld_fini, + stack_end); +} +#endif diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym new file mode 100644 index 0000000000..a9d53d195f --- /dev/null +++ b/sysdeps/x86/rtld-global-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/tst-get-cpu-features-static.c b/sysdeps/x86/tst-get-cpu-features-static.c new file mode 100644 index 0000000000..03f59060c5 --- /dev/null +++ b/sysdeps/x86/tst-get-cpu-features-static.c @@ -0,0 +1 @@ +#include "tst-get-cpu-features.c" diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c new file mode 100644 index 0000000000..c17060f5e1 --- /dev/null +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -0,0 +1,31 @@ +/* Test case for x86 __get_cpu_features interface + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +static int +do_test (void) +{ + if (__get_cpu_features ()->kind == arch_kind_unknown) + abort (); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index f1cbf50957..0ff5309e61 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -21,40 +21,11 @@ #include #include #include +#include "multiarch/init-arch.h" -#ifndef __cpuid_count -/* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc - 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */ -# if defined(__i386__) && defined(__PIC__) -/* %ebx may be the PIC register. */ -# define __cpuid_count(level, count, a, b, c, d) \ - __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ - "cpuid\n\t" \ - "xchg{l}\t{%%}ebx, %1\n\t" \ - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "0" (level), "2" (count)) -# else -# define __cpuid_count(level, count, a, b, c, d) \ - __asm__ ("cpuid\n\t" \ - : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ - : "0" (level), "2" (count)) -# endif -#endif - -#ifdef USE_MULTIARCH -# include "multiarch/init-arch.h" - -# define is_intel __cpu_features.kind == arch_kind_intel -# define is_amd __cpu_features.kind == arch_kind_amd -# define max_cpuid __cpu_features.max_cpuid -#else - /* This spells out "GenuineIntel". */ -# define is_intel \ - ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69 - /* This spells out "AuthenticAMD". */ -# define is_amd \ - ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65 -#endif +#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel +#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd +#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid static const struct intel_02_cache_info { @@ -235,21 +206,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, /* Intel reused this value. For family 15, model 6 it specifies the 3rd level cache. Otherwise the 2nd level cache. */ - unsigned int family; - unsigned int model; -#ifdef USE_MULTIARCH - family = __cpu_features.family; - model = __cpu_features.model; -#else - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - __cpuid (1, eax, ebx, ecx, edx); - - family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); - model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf); -#endif + unsigned int family = GLRO(dl_x86_cpu_features).family; + unsigned int model = GLRO(dl_x86_cpu_features).model; if (family == 15 && model == 6) { @@ -476,18 +434,6 @@ long int attribute_hidden __cache_sysconf (int name) { -#ifdef USE_MULTIARCH - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); -#else - /* Find out what brand of processor. */ - unsigned int max_cpuid; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - __cpuid (0, max_cpuid, ebx, ecx, edx); -#endif - if (is_intel) return handle_intel (name, max_cpuid); @@ -523,18 +469,6 @@ long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024; int __x86_prefetchw attribute_hidden; #endif -#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION -/* Instructions preferred for memory and string routines. - - 0: Regular instructions - 1: MMX instructions - 2: SSE2 instructions - 3: SSSE3 instructions - - */ -int __x86_preferred_memory_instruction attribute_hidden; -#endif - static void __attribute__((constructor)) @@ -551,14 +485,6 @@ init_cacheinfo (void) unsigned int level; unsigned int threads = 0; -#ifdef USE_MULTIARCH - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); -#else - int max_cpuid; - __cpuid (0, max_cpuid, ebx, ecx, edx); -#endif - if (is_intel) { data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); @@ -574,30 +500,13 @@ init_cacheinfo (void) shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } - unsigned int ebx_1; - -#ifdef USE_MULTIARCH - eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; -#else - __cpuid (1, eax, ebx_1, ecx, edx); -#endif - -#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION - /* Intel prefers SSSE3 instructions for memory/string routines - if they are available. */ - if ((ecx & 0x200)) - __x86_preferred_memory_instruction = 3; - else - __x86_preferred_memory_instruction = 2; -#endif - /* Figure out the number of logical threads that share the highest cache level. */ if (max_cpuid >= 4) { + unsigned int family = GLRO(dl_x86_cpu_features).family; + unsigned int model = GLRO(dl_x86_cpu_features).model; + int i = 0; /* Query until desired cache level is enumerated. */ @@ -647,13 +556,33 @@ init_cacheinfo (void) } } threads += 1; + if (threads > 2 && level == 2 && family == 6) + { + switch (model) + { + case 0x57: + /* Knights Landing has L2 cache shared by 2 cores. */ + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Silvermont has L2 cache shared by 2 cores. */ + threads = 2; + break; + default: + break; + } + } } else { intel_bug_no_cache_info: /* Assume that all logical threads share the highest cache level. */ - threads = (ebx_1 >> 16) & 0xff; + threads + = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx + >> 16) & 0xff); } /* Cap usage of highest cache level to the number of supported @@ -668,25 +597,6 @@ init_cacheinfo (void) long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); -#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION -# ifdef USE_MULTIARCH - eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; -# else - __cpuid (1, eax, ebx, ecx, edx); -# endif - - /* AMD prefers SSSE3 instructions for memory/string routines - if they are avaiable, otherwise it prefers integer - instructions. */ - if ((ecx & 0x200)) - __x86_preferred_memory_instruction = 3; - else - __x86_preferred_memory_instruction = 0; -#endif - /* Get maximum extended function. */ __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index fd7927501a..3fa5e614ac 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -26,6 +26,7 @@ #include #include #include +#include /* Return nonzero iff ELF header is compatible with the running host. */ static inline int __attribute__ ((unused)) @@ -205,6 +206,8 @@ dl_platform_init (void) if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') /* Avoid an empty string which would disturb us. */ GLRO(dl_platform) = NULL; + + init_cpu_features (&GLRO(dl_x86_cpu_features)); } static inline ElfW(Addr) diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c new file mode 100644 index 0000000000..851681af04 --- /dev/null +++ b/sysdeps/x86_64/dl-procinfo.c @@ -0,0 +1,57 @@ +/* Data for x86-64 version of processor capability information. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +# define PROCINFO_CLASS +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_cpu_features +#else +PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features +#endif +#ifndef PROCINFO_DECL += { } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#undef PROCINFO_DECL +#undef PROCINFO_CLASS diff --git a/sysdeps/x86_64/ldsodefs.h b/sysdeps/x86_64/ldsodefs.h index 84d36e82be..e3f2da24f1 100644 --- a/sysdeps/x86_64/ldsodefs.h +++ b/sysdeps/x86_64/ldsodefs.h @@ -20,6 +20,7 @@ #define _X86_64_LDSODEFS_H 1 #include +#include struct La_x86_64_regs; struct La_x86_64_retval; diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index d7002a9df3..d10b4d4fb3 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -1,5 +1,4 @@ ifeq ($(subdir),csu) -aux += init-arch tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions deleted file mode 100644 index 59b185ac8d..0000000000 --- a/sysdeps/x86_64/multiarch/Versions +++ /dev/null @@ -1,5 +0,0 @@ -libc { - GLIBC_PRIVATE { - __get_cpu_features; - } -} diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c deleted file mode 100644 index f87b8dce6b..0000000000 --- a/sysdeps/x86_64/multiarch/cacheinfo.c +++ /dev/null @@ -1,2 +0,0 @@ -#define DISABLE_PREFERRED_MEMORY_INSTRUCTION -#include "../cacheinfo.c" diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym index a410d8808f..3df946f343 100644 --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym @@ -4,7 +4,6 @@ -- CPU_FEATURES_SIZE sizeof (struct cpu_features) -KIND_OFFSET offsetof (struct cpu_features, kind) CPUID_OFFSET offsetof (struct cpu_features, cpuid) CPUID_SIZE sizeof (struct cpuid_registers) CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c deleted file mode 100644 index 7dec21884d..0000000000 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ /dev/null @@ -1,206 +0,0 @@ -/* Initialize CPU feature data. - This file is part of the GNU C Library. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Contributed by Ulrich Drepper . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include -#include "init-arch.h" - - -struct cpu_features __cpu_features attribute_hidden; - - -static void -get_common_indeces (unsigned int *family, unsigned int *model) -{ - __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; -} - - -void -__init_cpu_features (void) -{ - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - unsigned int family = 0; - unsigned int model = 0; - enum cpu_features_kind kind; - - __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); - - /* This spells out "GenuineIntel". */ - if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) - { - kind = arch_kind_intel; - - get_common_indeces (&family, &model); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - unsigned int extended_family = (eax >> 20) & 0xff; - unsigned int extended_model = (eax >> 12) & 0xf0; - if (family == 0x0f) - { - family += extended_family; - model += extended_model; - } - else if (family == 0x06) - { - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - model += extended_model; - switch (model) - { - case 0x1c: - case 0x26: - /* BSF is slow on Atom. */ - __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF; - break; - - case 0x37: - case 0x4a: - case 0x4d: - case 0x5a: - case 0x5d: - /* Unaligned load versions are faster than SSSE3 - on Silvermont. */ -#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -#endif -#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 -# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 -#endif - __cpu_features.feature[index_Fast_Unaligned_Load] - |= (bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop - | bit_Slow_SSE4_2); - break; - - default: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if ((ecx & bit_AVX) == 0) - break; - - case 0x1a: - case 0x1e: - case 0x1f: - case 0x25: - case 0x2c: - case 0x2e: - case 0x2f: - /* Rep string instructions, copy backward, unaligned loads - and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_Fast_Rep_String != index_Fast_Copy_Backward -# error index_Fast_Rep_String != index_Fast_Copy_Backward -#endif -#if index_Fast_Rep_String != index_Fast_Unaligned_Load -# error index_Fast_Rep_String != index_Fast_Unaligned_Load -#endif -#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -#endif - __cpu_features.feature[index_Fast_Rep_String] - |= (bit_Fast_Rep_String - | bit_Fast_Copy_Backward - | bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop); - break; - } - } - } - /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) - { - kind = arch_kind_amd; - - get_common_indeces (&family, &model); - - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - - unsigned int eax; - __cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax >= 0x80000001) - __cpuid (0x80000001, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); - } - else - kind = arch_kind_other; - - if (__cpu_features.max_cpuid >= 7) - __cpuid_count (7, 0, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); - - /* Can we call xgetbv? */ - if (CPUID_OSXSAVE) - { - unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - /* Is YMM and XMM state usable? */ - if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == - (bit_YMM_state | bit_XMM_state)) - { - /* Determine if AVX is usable. */ - if (CPUID_AVX) - __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; -#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -#endif - /* Determine if AVX2 is usable. Unaligned load with 256-bit - AVX registers are faster on processors with AVX2. */ - if (CPUID_AVX2) - __cpu_features.feature[index_AVX2_Usable] - |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; - /* Determine if FMA is usable. */ - if (CPUID_FMA) - __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; - /* Determine if FMA4 is usable. */ - if (CPUID_FMA4) - __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; - } - } - - __cpu_features.family = family; - __cpu_features.model = model; - atomic_write_barrier (); - __cpu_features.kind = kind; -} - -#undef __get_cpu_features - -const struct cpu_features * -__get_cpu_features (void) -{ - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); - - return &__cpu_features; -} diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index e6b5ba5530..2b9988ebc3 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -15,175 +15,8 @@ License along with the GNU C Library; if not, see . */ -#define bit_Fast_Rep_String (1 << 0) -#define bit_Fast_Copy_Backward (1 << 1) -#define bit_Slow_BSF (1 << 2) -#define bit_Fast_Unaligned_Load (1 << 4) -#define bit_Prefer_PMINUB_for_stringop (1 << 5) -#define bit_AVX_Usable (1 << 6) -#define bit_FMA_Usable (1 << 7) -#define bit_FMA4_Usable (1 << 8) -#define bit_Slow_SSE4_2 (1 << 9) -#define bit_AVX2_Usable (1 << 10) -#define bit_AVX_Fast_Unaligned_Load (1 << 11) - -/* CPUID Feature flags. */ - -/* COMMON_CPUID_INDEX_1. */ -#define bit_SSE2 (1 << 26) -#define bit_SSSE3 (1 << 9) -#define bit_SSE4_1 (1 << 19) -#define bit_SSE4_2 (1 << 20) -#define bit_OSXSAVE (1 << 27) -#define bit_AVX (1 << 28) -#define bit_POPCOUNT (1 << 23) -#define bit_FMA (1 << 12) -#define bit_FMA4 (1 << 16) - -/* COMMON_CPUID_INDEX_7. */ -#define bit_RTM (1 << 11) -#define bit_AVX2 (1 << 5) - -/* XCR0 Feature flags. */ -#define bit_XMM_state (1 << 1) -#define bit_YMM_state (2 << 1) - -/* The integer bit array index for the first set of internal feature bits. */ -# define FEATURE_INDEX_1 0 - -/* The current maximum size of the feature integer bit array. */ -# define FEATURE_INDEX_MAX 1 - -#ifdef __ASSEMBLER__ - -# include - -# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET -# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET - -# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE - -#else /* __ASSEMBLER__ */ - -# include - -enum - { - COMMON_CPUID_INDEX_1 = 0, - COMMON_CPUID_INDEX_7, - COMMON_CPUID_INDEX_80000001, /* for AMD */ - /* Keep the following line at the end. */ - COMMON_CPUID_INDEX_MAX - }; - -extern struct cpu_features -{ - enum cpu_features_kind - { - arch_kind_unknown = 0, - arch_kind_intel, - arch_kind_amd, - arch_kind_other - } kind; - int max_cpuid; - struct cpuid_registers - { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - } cpuid[COMMON_CPUID_INDEX_MAX]; - unsigned int family; - unsigned int model; - unsigned int feature[FEATURE_INDEX_MAX]; -} __cpu_features attribute_hidden; - - -extern void __init_cpu_features (void) attribute_hidden; -# define INIT_ARCH() \ - do \ - if (__cpu_features.kind == arch_kind_unknown) \ - __init_cpu_features (); \ - while (0) - -/* Used from outside libc.so to get access to the CPU features structure. */ -extern const struct cpu_features *__get_cpu_features (void) - __attribute__ ((const)); - -# if IS_IN (libc) -# define __get_cpu_features() (&__cpu_features) -# endif - -# define HAS_CPU_FEATURE(idx, reg, bit) \ - ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0) - -/* Following are the feature tests used throughout libc. */ - -/* CPUID_* evaluates to true if the feature flag is enabled. - We always use &__cpu_features because the HAS_CPUID_* macros - are called only within __init_cpu_features, where we can't - call __get_cpu_features without infinite recursion. */ -# define HAS_CPUID_FLAG(idx, reg, bit) \ - (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0) - -# define CPUID_OSXSAVE \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE) -# define CPUID_AVX \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX) -# define CPUID_FMA \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA) -# define CPUID_FMA4 \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) -# define CPUID_RTM \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) -# define CPUID_AVX2 \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) - -/* HAS_* evaluates to true if we may use the feature at runtime. */ -# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) -# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT) -# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3) -# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) -# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) -# define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) - -# define index_Fast_Rep_String FEATURE_INDEX_1 -# define index_Fast_Copy_Backward FEATURE_INDEX_1 -# define index_Slow_BSF FEATURE_INDEX_1 -# define index_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 -# define index_AVX_Usable FEATURE_INDEX_1 -# define index_FMA_Usable FEATURE_INDEX_1 -# define index_FMA4_Usable FEATURE_INDEX_1 -# define index_Slow_SSE4_2 FEATURE_INDEX_1 -# define index_AVX2_Usable FEATURE_INDEX_1 -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 - -# define HAS_ARCH_FEATURE(name) \ - ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) - -# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) -# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) -# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) -# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) -# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) -# define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable) -# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) -# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) -# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - -#endif /* __ASSEMBLER__ */ +#ifdef __ASSEMBLER__ +# include +#else +# include +#endif -- cgit 1.4.1