diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2018-12-03 05:54:43 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2018-12-03 05:54:56 -0800 |
commit | c22e4c2a1431c5e77bf4288d35bf7629f2f093aa (patch) | |
tree | 666ae70088b899ad87a250a08b3be7a24bbc0157 /sysdeps/x86/cpu-features.c | |
parent | 7b36d26b22d147ffc347f427f9fd584700578a94 (diff) | |
download | glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.tar.gz glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.tar.xz glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.zip |
x86: Extend CPUID support in struct cpu_features
Extend CPUID support for all feature bits from CPUID. Add a new macro, CPU_FEATURE_USABLE, which can be used to check if a feature is usable at run-time, instead of HAS_CPU_FEATURE and HAS_ARCH_FEATURE. Add COMMON_CPUID_INDEX_D_ECX_1, COMMON_CPUID_INDEX_80000007 and COMMON_CPUID_INDEX_80000008 to check CPU feature bits in them. Tested on i686 and x86-64 as well as using build-many-glibcs.py with x86 targets. * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for cpu_features_basic. (__cache_sysconf): Likewise. (init_cacheinfo): Likewise. * sysdeps/x86/cpu-features.c (get_extended_indeces): Also populate COMMON_CPUID_INDEX_80000007 and COMMON_CPUID_INDEX_80000008. (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1. Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if XSAVEC is available. Set the bit_arch_XXX_Usable bits. (init_cpu_features): Use _Static_assert on index_arch_Fast_Unaligned_Load. __get_cpuid_registers and __get_arch_feature. Updated for cpu_features_basic. Set stepping in cpu_features. * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum. (FEATURE_INDEX_2): New. (FEATURE_INDEX_MAX): Changed to enum. (COMMON_CPUID_INDEX_D_ECX_1): New. (COMMON_CPUID_INDEX_80000007): Likewise. (COMMON_CPUID_INDEX_80000008): Likewise. (cpuid_registers): Likewise. (cpu_features_basic): Likewise. (CPU_FEATURE_USABLE): Likewise. (bit_arch_XXX_Usable): Likewise. (cpu_features): Use cpuid_registers and cpu_features_basic. (bit_arch_XXX): Reweritten. (bit_cpu_XXX): Likewise. (index_cpu_XXX): Likewise. (reg_XXX): Likewise. * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and <support/check.h>. (CHECK_CPU_FEATURE): New. (CHECK_CPU_FEATURE_USABLE): Likewise. (cpu_kinds): Likewise. (do_test): Print vendor, family, model and stepping. Check HAS_CPU_FEATURE and CPU_FEATURE_USABLE. (TEST_FUNCTION): Removed. Include <support/test-driver.c> instead of "../../test-skeleton.c". * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount): Check POPCNT instead of POPCOUNT. * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.
Diffstat (limited to 'sysdeps/x86/cpu-features.c')
-rw-r--r-- | sysdeps/x86/cpu-features.c | 141 |
1 files changed, 107 insertions, 34 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 80b3054cf8..5f9eefd408 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -53,7 +53,18 @@ get_extended_indices (struct cpu_features *cpu_features) cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); - + if (eax >= 0x80000007) + __cpuid (0x80000007, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx); + if (eax >= 0x80000008) + __cpuid (0x80000008, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx); } static void @@ -79,13 +90,20 @@ get_common_indices (struct cpu_features *cpu_features, } } - if (cpu_features->max_cpuid >= 7) + if (cpu_features->basic.max_cpuid >= 7) __cpuid_count (7, 0, cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + if (cpu_features->basic.max_cpuid >= 0xd) + __cpuid_count (0xd, 1, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx); + /* Can we call xgetbv? */ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) { @@ -117,6 +135,18 @@ get_common_indices (struct cpu_features *cpu_features, if (CPU_FEATURES_CPU_P (cpu_features, FMA)) cpu_features->feature[index_arch_FMA_Usable] |= bit_arch_FMA_Usable; + /* Determine if VAES is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, VAES)) + cpu_features->feature[index_arch_VAES_Usable] + |= bit_arch_VAES_Usable; + /* Determine if VPCLMULQDQ is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ)) + cpu_features->feature[index_arch_VPCLMULQDQ_Usable] + |= bit_arch_VPCLMULQDQ_Usable; + /* Determine if XOP is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, XOP)) + cpu_features->feature[index_arch_XOP_Usable] + |= bit_arch_XOP_Usable; } /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and @@ -130,17 +160,69 @@ get_common_indices (struct cpu_features *cpu_features, { cpu_features->feature[index_arch_AVX512F_Usable] |= bit_arch_AVX512F_Usable; + /* Determine if AVX512CD is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD)) + cpu_features->feature[index_arch_AVX512CD_Usable] + |= bit_arch_AVX512CD_Usable; + /* Determine if AVX512ER is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) + cpu_features->feature[index_arch_AVX512ER_Usable] + |= bit_arch_AVX512ER_Usable; + /* Determine if AVX512PF is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF)) + cpu_features->feature[index_arch_AVX512PF_Usable] + |= bit_arch_AVX512PF_Usable; + /* Determine if AVX512VL is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL)) + cpu_features->feature[index_arch_AVX512VL_Usable] + |= bit_arch_AVX512VL_Usable; /* Determine if AVX512DQ is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)) cpu_features->feature[index_arch_AVX512DQ_Usable] |= bit_arch_AVX512DQ_Usable; + /* Determine if AVX512BW is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW)) + cpu_features->feature[index_arch_AVX512BW_Usable] + |= bit_arch_AVX512BW_Usable; + /* Determine if AVX512_4FMAPS is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS)) + cpu_features->feature[index_arch_AVX512_4FMAPS_Usable] + |= bit_arch_AVX512_4FMAPS_Usable; + /* Determine if AVX512_4VNNIW is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW)) + cpu_features->feature[index_arch_AVX512_4VNNIW_Usable] + |= bit_arch_AVX512_4VNNIW_Usable; + /* Determine if AVX512_BITALG is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG)) + cpu_features->feature[index_arch_AVX512_BITALG_Usable] + |= bit_arch_AVX512_BITALG_Usable; + /* Determine if AVX512_IFMA is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA)) + cpu_features->feature[index_arch_AVX512_IFMA_Usable] + |= bit_arch_AVX512_IFMA_Usable; + /* Determine if AVX512_VBMI is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI)) + cpu_features->feature[index_arch_AVX512_VBMI_Usable] + |= bit_arch_AVX512_VBMI_Usable; + /* Determine if AVX512_VBMI2 is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2)) + cpu_features->feature[index_arch_AVX512_VBMI2_Usable] + |= bit_arch_AVX512_VBMI2_Usable; + /* Determine if is AVX512_VNNI usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI)) + cpu_features->feature[index_arch_AVX512_VNNI_Usable] + |= bit_arch_AVX512_VNNI_Usable; + /* Determine if AVX512_VPOPCNTDQ is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ)) + cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable] + |= bit_arch_AVX512_VPOPCNTDQ_Usable; } } } /* For _dl_runtime_resolve, set xsave_state_size to xsave area size + integer register save size and align it to 64 bytes. */ - if (cpu_features->max_cpuid >= 0xd) + if (cpu_features->basic.max_cpuid >= 0xd) { unsigned int eax, ebx, ecx, edx; @@ -155,10 +237,8 @@ get_common_indices (struct cpu_features *cpu_features, cpu_features->xsave_state_full_size = xsave_state_full_size; - __cpuid_count (0xd, 1, eax, ebx, ecx, edx); - /* Check if XSAVEC is available. */ - if ((eax & (1 << 1)) != 0) + if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) { unsigned int xstate_comp_offsets[32]; unsigned int xstate_comp_sizes[32]; @@ -210,12 +290,25 @@ get_common_indices (struct cpu_features *cpu_features, } } +_Static_assert (((index_arch_Fast_Unaligned_Load + == index_arch_Fast_Unaligned_Copy) + && (index_arch_Fast_Unaligned_Load + == index_arch_Prefer_PMINUB_for_stringop) + && (index_arch_Fast_Unaligned_Load + == index_arch_Slow_SSE4_2) + && (index_arch_Fast_Unaligned_Load + == index_arch_Fast_Rep_String) + && (index_arch_Fast_Unaligned_Load + == index_arch_Fast_Copy_Backward)), + "Incorrect index_arch_Fast_Unaligned_Load"); + static inline void init_cpu_features (struct cpu_features *cpu_features) { unsigned int ebx, ecx, edx; unsigned int family = 0; unsigned int model = 0; + unsigned int stepping = 0; enum cpu_features_kind kind; #if !HAS_CPUID @@ -226,12 +319,12 @@ init_cpu_features (struct cpu_features *cpu_features) } #endif - __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); + __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx); /* This spells out "GenuineIntel". */ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) { - unsigned int extended_model, stepping; + unsigned int extended_model; kind = arch_kind_intel; @@ -270,15 +363,6 @@ init_cpu_features (struct cpu_features *cpu_features) case 0x5d: /* Unaligned load versions are faster than SSSE3 on Silvermont. */ -#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop -# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop -#endif -#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 -# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 -#endif -#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy -# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy -#endif cpu_features->feature[index_arch_Fast_Unaligned_Load] |= (bit_arch_Fast_Unaligned_Load | bit_arch_Fast_Unaligned_Copy @@ -301,15 +385,6 @@ init_cpu_features (struct cpu_features *cpu_features) case 0x2f: /* Rep string instructions, unaligned load, unaligned copy, and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load -# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load -#endif -#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop -# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop -#endif -#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy -# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy -#endif cpu_features->feature[index_arch_Fast_Rep_String] |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load @@ -353,7 +428,7 @@ init_cpu_features (struct cpu_features *cpu_features) /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) { - unsigned int extended_model, stepping; + unsigned int extended_model; kind = arch_kind_amd; @@ -375,9 +450,6 @@ init_cpu_features (struct cpu_features *cpu_features) if (family == 0x15) { -#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward -# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward -#endif /* "Excavator" */ if (model >= 0x60 && model <= 0x7f) { @@ -409,9 +481,10 @@ init_cpu_features (struct cpu_features *cpu_features) no_cpuid: #endif - cpu_features->family = family; - cpu_features->model = model; - cpu_features->kind = kind; + cpu_features->basic.kind = kind; + cpu_features->basic.family = family; + cpu_features->basic.model = model; + cpu_features->basic.stepping = stepping; #if HAVE_TUNABLES TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); @@ -432,7 +505,7 @@ no_cpuid: #ifdef __x86_64__ GLRO(dl_hwcap) = HWCAP_X86_64; - if (cpu_features->kind == arch_kind_intel) + if (cpu_features->basic.kind == arch_kind_intel) { const char *platform = NULL; |