diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2015-08-13 03:37:47 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2015-08-13 03:41:22 -0700 |
commit | e2e4f56056adddc3c1efe676b40a4b4f2453103b (patch) | |
tree | c9f54be6f6e8b57c8e58bdfac594aa3927378231 /sysdeps/x86_64/multiarch/init-arch.c | |
parent | 63e952d9be87db68f0e4164d4a5760b32e77ebff (diff) | |
download | glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.tar.gz glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.tar.xz glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.zip |
Add _dl_x86_cpu_features to rtld_global
This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so and initializes it early before __libc_start_main is called so that cpu_features is always available when it is used and we can avoid calling __init_cpu_features in IFUNC selectors. * sysdeps/i386/dl-machine.h: Include <cpu-features.c>. (dl_platform_init): Call init_cpu_features. * sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New. * sysdeps/i386/i686/cacheinfo.c (DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed. * sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch. * sysdeps/i386/i686/multiarch/Versions: Removed. * sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET): Removed. * sysdeps/i386/ldsodefs.h: Include <cpu-features.h>. * sysdeps/unix/sysv/linux/x86/Makefile (libpthread-sysdep_routines): Remove init-arch. * sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include <sysdeps/x86_64/dl-procinfo.c> instead of sysdeps/generic/dl-procinfo.c>. * sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers): Add cpu-features-offsets.sym and rtld-global-offsets.sym. [$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features. [$(subdir) == elf] (tests): Add tst-get-cpu-features. [$(subdir) == elf] (tests-static): Add tst-get-cpu-features-static. * sysdeps/x86/Versions: New file. * sysdeps/x86/cpu-features-offsets.sym: Likewise. * sysdeps/x86/cpu-features.c: Likewise. * sysdeps/x86/cpu-features.h: Likewise. * sysdeps/x86/dl-get-cpu-features.c: Likewise. * sysdeps/x86/libc-start.c: Likewise. * sysdeps/x86/rtld-global-offsets.sym: Likewise. * sysdeps/x86/tst-get-cpu-features-static.c: Likewise. * sysdeps/x86/tst-get-cpu-features.c: Likewise. * sysdeps/x86_64/dl-procinfo.c: Likewise. * sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed. Assume USE_MULTIARCH is defined and don't check it. (is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features). (is_amd): Likewise. (max_cpuid): Likewise. (intel_check_word): Likewise. (__cache_sysconf): Don't call __init_cpu_features. (__x86_preferred_memory_instruction): Removed. (init_cacheinfo): Don't call __init_cpu_features. Replace __cpu_features with GLRO(dl_x86_cpu_features). * sysdeps/x86_64/dl-machine.h: <cpu-features.c>. (dl_platform_init): Call init_cpu_features. * sysdeps/x86_64/ldsodefs.h: Include <cpu-features.h>. * sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch. * sysdeps/x86_64/multiarch/Versions: Removed. * sysdeps/x86_64/multiarch/cacheinfo.c: Likewise. * sysdeps/x86_64/multiarch/init-arch.c: Likewise. * sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET): Removed. * sysdeps/x86_64/multiarch/init-arch.h: Rewrite.
Diffstat (limited to 'sysdeps/x86_64/multiarch/init-arch.c')
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.c | 223 |
1 files changed, 0 insertions, 223 deletions
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c deleted file mode 100644 index aaad5fa841..0000000000 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ /dev/null @@ -1,223 +0,0 @@ -/* Initialize CPU feature data. - This file is part of the GNU C Library. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <atomic.h> -#include <cpuid.h> -#include "init-arch.h" - - -struct cpu_features __cpu_features attribute_hidden; - - -static void -get_common_indeces (unsigned int *family, unsigned int *model) -{ - __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; -} - - -void -__init_cpu_features (void) -{ - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - unsigned int family = 0; - unsigned int model = 0; - enum cpu_features_kind kind; - - __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); - - /* This spells out "GenuineIntel". */ - if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) - { - kind = arch_kind_intel; - - get_common_indeces (&family, &model); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - unsigned int extended_family = (eax >> 20) & 0xff; - unsigned int extended_model = (eax >> 12) & 0xf0; - if (family == 0x0f) - { - family += extended_family; - model += extended_model; - } - else if (family == 0x06) - { - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - model += extended_model; - switch (model) - { - case 0x1c: - case 0x26: - /* BSF is slow on Atom. */ - __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF; - break; - - case 0x37: - case 0x4a: - case 0x4d: - case 0x5a: - case 0x5d: - /* Unaligned load versions are faster than SSSE3 - on Silvermont. */ -#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -#endif -#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 -# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 -#endif - __cpu_features.feature[index_Fast_Unaligned_Load] - |= (bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop - | bit_Slow_SSE4_2); - break; - - default: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if ((ecx & bit_AVX) == 0) - break; - - case 0x1a: - case 0x1e: - case 0x1f: - case 0x25: - case 0x2c: - case 0x2e: - case 0x2f: - /* Rep string instructions, copy backward, unaligned loads - and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_Fast_Rep_String != index_Fast_Copy_Backward -# error index_Fast_Rep_String != index_Fast_Copy_Backward -#endif -#if index_Fast_Rep_String != index_Fast_Unaligned_Load -# error index_Fast_Rep_String != index_Fast_Unaligned_Load -#endif -#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -#endif - __cpu_features.feature[index_Fast_Rep_String] - |= (bit_Fast_Rep_String - | bit_Fast_Copy_Backward - | bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop); - break; - } - } - } - /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) - { - kind = arch_kind_amd; - - get_common_indeces (&family, &model); - - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - - unsigned int eax; - __cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax >= 0x80000001) - __cpuid (0x80000001, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); - } - else - kind = arch_kind_other; - - if (__cpu_features.max_cpuid >= 7) - __cpuid_count (7, 0, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); - - /* Can we call xgetbv? */ - if (CPUID_OSXSAVE) - { - unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - /* Is YMM and XMM state usable? */ - if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == - (bit_YMM_state | bit_XMM_state)) - { - /* Determine if AVX is usable. */ - if (CPUID_AVX) - __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; -#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -#endif - /* Determine if AVX2 is usable. Unaligned load with 256-bit - AVX registers are faster on processors with AVX2. */ - if (CPUID_AVX2) - __cpu_features.feature[index_AVX2_Usable] - |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; - /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and - ZMM16-ZMM31 state are enabled. */ - if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state - | bit_ZMM16_31_state)) == - (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) - { - /* Determine if AVX512F is usable. */ - if (CPUID_AVX512F) - { - __cpu_features.feature[index_AVX512F_Usable] - |= bit_AVX512F_Usable; - /* Determine if AVX512DQ is usable. */ - if (CPUID_AVX512DQ) - __cpu_features.feature[index_AVX512DQ_Usable] - |= bit_AVX512DQ_Usable; - } - } - /* Determine if FMA is usable. */ - if (CPUID_FMA) - __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; - /* Determine if FMA4 is usable. */ - if (CPUID_FMA4) - __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; - } - } - - __cpu_features.family = family; - __cpu_features.model = model; - atomic_write_barrier (); - __cpu_features.kind = kind; -} - -#undef __get_cpu_features - -const struct cpu_features * -__get_cpu_features (void) -{ - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); - - return &__cpu_features; -} |