/* Initialize CPU features for use by the math library. This file is part of the GNU C Library. Copyright (C) 2008-2018 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ #include #include #include #include #include static unsigned int features; unsigned int __x86_math_features (void) { unsigned int features_local = atomic_load_relaxed (&features); if (features_local != 0) /* At least the initialization bit is set, which means that we have a proper value. */ return features_local; /* Perform initialization. */ features_local = x86_math_feature_initialized; unsigned int eax, ebx, ecx, edx; unsigned int max_cpuid; __cpuid (0, max_cpuid, ebx, ecx, edx); bool cpu_amd = ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65; if (max_cpuid >= 7) { __cpuid (1, eax, ebx, ecx, edx); bool flag_fma = ecx & bit_cpu_FMA; bool flag_osxsave = ecx & bit_cpu_OSXSAVE; bool flag_avx = ecx & bit_cpu_AVX; bool flag_sse41 = ecx & bit_cpu_SSE4_1; if (flag_sse41) features_local |= x86_math_feature_sse41; __cpuid_count (7, 0, eax, ebx, ecx, edx); bool flag_avx2 = ebx & bit_cpu_AVX2; if (flag_osxsave) { unsigned int xcrlow; unsigned int xcrhigh; asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); bool ymm_xmm_usable = (xcrlow & (bit_YMM_state | bit_XMM_state)) == (bit_YMM_state | bit_XMM_state); /* Is YMM and XMM state usable? */ if (ymm_xmm_usable) { if (flag_avx) { features_local |= x86_math_feature_avx; if (flag_avx2) features_local |= x86_math_feature_avx2; if (flag_fma) features_local |= x86_math_feature_fma; if (cpu_amd) { __cpuid (0x80000000, eax, ebx, ecx, edx); if (eax >= 0x80000001) { __cpuid (0x80000001, eax, ebx, ecx, edx); bool flag_fma4 = ecx & bit_cpu_FMA4; if (flag_fma4) features_local |= x86_math_feature_fma4; } } } } } } atomic_store_relaxed (&features, features_local); return features_local; }