about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h13
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-fma.h7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fma.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fmaf.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/x86-math-features.c96
-rw-r--r--sysdeps/x86_64/fpu/multiarch/x86-math-features.h18
9 files changed, 140 insertions, 21 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 9a89bfc286..9987e1bb2b 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -15,6 +15,8 @@ libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
 			halfulp-fma mpexp-fma \
 			mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
 
+libm-sysdep_routines += x86-math-features
+
 CFLAGS-doasin-fma.c = -mfma -mavx2
 CFLAGS-dosincos-fma.c = -mfma -mavx2
 CFLAGS-e_asin-fma.c = -mfma -mavx2
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
index a5f9375afc..c5924309c9 100644
--- a/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <init-arch.h>
+#include <x86-math-features.h>
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
@@ -27,16 +28,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden;
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  unsigned int features = __x86_math_features ();
 
-  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
-      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+  if ((features & x86_math_feature_fma)
+      && (features & x86_math_feature_avx2))
     return OPTIMIZE (fma);
-
-  if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+  if (features & x86_math_feature_fma4)
     return OPTIMIZE (fma4);
-
-  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable))
+  if (features & x86_math_feature_avx)
     return OPTIMIZE (avx);
 
   return OPTIMIZE (sse2);
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
index 63a8cd221f..1b0a95db0d 100644
--- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <init-arch.h>
+#include <x86-math-features.h>
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
@@ -24,10 +25,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  unsigned int features = __x86_math_features ();
 
-  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
-      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+  if ((features & x86_math_feature_fma)
+      && (features & x86_math_feature_avx2))
     return OPTIMIZE (fma);
 
   return OPTIMIZE (sse2);
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
index a2526a2ee0..6fb21ee024 100644
--- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <init-arch.h>
+#include <x86-math-features.h>
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
@@ -26,13 +27,12 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden;
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  unsigned int features = __x86_math_features ();
 
-  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
-      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+  if ((features & x86_math_feature_fma)
+      && (features & x86_math_feature_avx2))
     return OPTIMIZE (fma);
-
-  if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+  if (features & x86_math_feature_fma4)
     return OPTIMIZE (fma4);
 
   return OPTIMIZE (sse2);
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
index a8710ba802..ff136f4dc5 100644
--- a/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <init-arch.h>
+#include <x86-math-features.h>
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (c) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
@@ -24,9 +25,9 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  unsigned int features = __x86_math_features ();
 
-  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+  if (features & x86_math_feature_sse41)
     return OPTIMIZE (sse41);
 
   return OPTIMIZE (c);
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 875c76d372..66da7ff132 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -20,6 +20,7 @@
 #include <config.h>
 #include <math.h>
 #include <init-arch.h>
+#include <x86-math-features.h>
 #include <libm-alias-double.h>
 
 extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
@@ -41,8 +42,8 @@ __fma_fma4 (double x, double y, double z)
 }
 
 
-libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable)
-	    ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
+libm_ifunc (__fma, __x86_math_features () & x86_math_feature_fma
+	    ? __fma_fma3 : (__x86_math_features () & x86_math_feature_fma4
 			    ? __fma_fma4 : __fma_sse2));
 libm_alias_double (__fma, fma)
 
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 5f4c2ec0be..d65aa9a16f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -19,6 +19,7 @@
 #include <config.h>
 #include <math.h>
 #include <init-arch.h>
+#include <x86-math-features.h>
 #include <libm-alias-float.h>
 
 extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
@@ -40,8 +41,8 @@ __fmaf_fma4 (float x, float y, float z)
 }
 
 
-libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable)
-	    ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
+libm_ifunc (__fmaf, __x86_math_features () & x86_math_feature_fma
+	    ? __fmaf_fma3 : (__x86_math_features () & x86_math_feature_fma4
 			     ? __fmaf_fma4 : __fmaf_sse2));
 libm_alias_float (__fma, fma)
 
diff --git a/sysdeps/x86_64/fpu/multiarch/x86-math-features.c b/sysdeps/x86_64/fpu/multiarch/x86-math-features.c
new file mode 100644
index 0000000000..e803b73229
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/x86-math-features.c
@@ -0,0 +1,96 @@
+/* Initialize CPU features for use by the math library.
+   This file is part of the GNU C Library.
+   Copyright (C) 2008-2018 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <x86-math-features.h>
+#include <atomic.h>
+#include <cpuid.h>
+#include <cpu-features.h>
+#include <stdbool.h>
+
+static unsigned int features;
+
+unsigned int
+__x86_math_features (void)
+{
+  unsigned int features_local = atomic_load_relaxed (&features);
+  if (features_local != 0)
+    /* At least the initialization bit is set, which means that we
+       have a proper value.  */
+    return features_local;
+
+  /* Perform initialization.  */
+  features_local = x86_math_feature_initialized;
+
+  unsigned int eax, ebx, ecx, edx;
+  unsigned int max_cpuid;
+  __cpuid (0, max_cpuid, ebx, ecx, edx);
+  bool cpu_amd = ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65;
+
+  if (max_cpuid >= 7)
+    {
+      __cpuid (1, eax, ebx, ecx, edx);
+      bool flag_fma = ecx & bit_cpu_FMA;
+      bool flag_osxsave = ecx & bit_cpu_OSXSAVE;
+      bool flag_avx = ecx & bit_cpu_AVX;
+      bool flag_sse41 = ecx & bit_cpu_SSE4_1;
+
+      if (flag_sse41)
+        features_local |= x86_math_feature_sse41;
+
+      __cpuid_count (7, 0, eax, ebx, ecx, edx);
+      bool flag_avx2 = ebx & bit_cpu_AVX2;
+
+      if (flag_osxsave)
+        {
+          unsigned int xcrlow;
+          unsigned int xcrhigh;
+          asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+          bool ymm_xmm_usable
+            = (xcrlow & (bit_YMM_state | bit_XMM_state))
+              == (bit_YMM_state | bit_XMM_state);
+
+          /* Is YMM and XMM state usable?  */
+          if (ymm_xmm_usable)
+            {
+              if (flag_avx)
+                {
+                  features_local |= x86_math_feature_avx;
+                  if (flag_avx2)
+                    features_local |= x86_math_feature_avx2;
+                  if (flag_fma)
+                    features_local |= x86_math_feature_fma;
+
+                  if (cpu_amd)
+                    {
+                      __cpuid (0x80000000, eax, ebx, ecx, edx);
+                      if (eax >= 0x80000001)
+                        {
+                          __cpuid (0x80000001, eax, ebx, ecx, edx);
+                          bool flag_fma4 = ecx & bit_cpu_FMA4;
+                          if (flag_fma4)
+                            features_local |= x86_math_feature_fma4;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+  atomic_store_relaxed (&features, features_local);
+  return features_local;
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/x86-math-features.h b/sysdeps/x86_64/fpu/multiarch/x86-math-features.h
new file mode 100644
index 0000000000..c43b66de7c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/x86-math-features.h
@@ -0,0 +1,18 @@
+#ifndef X86_MATH_FEATURES_H
+#define X86_MATH_FEATURES_H
+
+enum
+  {
+    x86_math_feature_initialized = 1 << 0,
+    x86_math_feature_avx = 1 << 1,
+    x86_math_feature_avx2 = 1 << 2,
+    x86_math_feature_fma = 1 << 3,
+    x86_math_feature_fma4 = 1 << 4,
+    x86_math_feature_sse41 = 1 << 5,
+  };
+
+/* Return a combination of flags x86_math_feature_* above.  */
+unsigned int __x86_math_features (void)
+  __attribute__ ((const)) attribute_hidden;
+
+#endif /* X86_MATH_FEATURES_H */