about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/multiarch/x86-math-features.c
blob: e803b732290624cd7bf21ac04f656f82ce4a0433 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/* Initialize CPU features for use by the math library.
   This file is part of the GNU C Library.
   Copyright (C) 2008-2018 Free Software Foundation, Inc.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <x86-math-features.h>
#include <atomic.h>
#include <cpuid.h>
#include <cpu-features.h>
#include <stdbool.h>

static unsigned int features;

unsigned int
__x86_math_features (void)
{
  unsigned int features_local = atomic_load_relaxed (&features);
  if (features_local != 0)
    /* At least the initialization bit is set, which means that we
       have a proper value.  */
    return features_local;

  /* Perform initialization.  */
  features_local = x86_math_feature_initialized;

  unsigned int eax, ebx, ecx, edx;
  unsigned int max_cpuid;
  __cpuid (0, max_cpuid, ebx, ecx, edx);
  bool cpu_amd = ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65;

  if (max_cpuid >= 7)
    {
      __cpuid (1, eax, ebx, ecx, edx);
      bool flag_fma = ecx & bit_cpu_FMA;
      bool flag_osxsave = ecx & bit_cpu_OSXSAVE;
      bool flag_avx = ecx & bit_cpu_AVX;
      bool flag_sse41 = ecx & bit_cpu_SSE4_1;

      if (flag_sse41)
        features_local |= x86_math_feature_sse41;

      __cpuid_count (7, 0, eax, ebx, ecx, edx);
      bool flag_avx2 = ebx & bit_cpu_AVX2;

      if (flag_osxsave)
        {
          unsigned int xcrlow;
          unsigned int xcrhigh;
          asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
          bool ymm_xmm_usable
            = (xcrlow & (bit_YMM_state | bit_XMM_state))
              == (bit_YMM_state | bit_XMM_state);

          /* Is YMM and XMM state usable?  */
          if (ymm_xmm_usable)
            {
              if (flag_avx)
                {
                  features_local |= x86_math_feature_avx;
                  if (flag_avx2)
                    features_local |= x86_math_feature_avx2;
                  if (flag_fma)
                    features_local |= x86_math_feature_fma;

                  if (cpu_amd)
                    {
                      __cpuid (0x80000000, eax, ebx, ecx, edx);
                      if (eax >= 0x80000001)
                        {
                          __cpuid (0x80000001, eax, ebx, ecx, edx);
                          bool flag_fma4 = ecx & bit_cpu_FMA4;
                          if (flag_fma4)
                            features_local |= x86_math_feature_fma4;
                        }
                    }
                }
            }
        }
    }

  atomic_store_relaxed (&features, features_local);
  return features_local;
}