about summary refs log tree commit diff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-20 22:43:15 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-20 22:43:15 -0400
commited72b6545f6d20f2d29ed71d65394d4a75ad358e (patch)
treee47730c47098dfbf1d41d95210009fd4a5fc0e5c /sysdeps/x86_64
parent8d4f46c613c4397c5531b959744541862cf09ad0 (diff)
downloadglibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar.gz
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar.xz
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.zip
Check for FMA4 support and generate appropriate fma functions
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fma.c22
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fmaf.c22
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c10
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h2
4 files changed, 49 insertions, 7 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 9a680c68fc..06f2d001d9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -1,5 +1,5 @@
 /* FMA version of fma.
-   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -28,13 +28,29 @@ extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
 
 
 static double
-__fma_fma (double x, double y, double z)
+__fma_fma3 (double x, double y, double z)
 {
   asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
   return x;
 }
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
+
+# ifdef HAVE_FMA4_SUPPORT
+static double
+__fma_fma4 (double x, double y, double z)
+{
+  asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
+  return x;
+}
+# else
+#  undef HAS_FMA4
+#  define HAS_FMA4 0
+#  define __fma_fma4 NULL
+# endif
+
+
+libm_ifunc (__fma, HAS_FMA
+	    ? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2));
 weak_alias (__fma, fma)
 
 # define __fma __fma_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 85ef65a50e..53c08de47c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -1,5 +1,5 @@
 /* FMA version of fmaf.
-   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -27,13 +27,29 @@ extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
 
 
 static float
-__fmaf_fma (float x, float y, float z)
+__fmaf_fma3 (float x, float y, float z)
 {
   asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
   return x;
 }
 
-libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
+
+# ifdef HAVE_FMA4_SUPPORT
+static float
+__fmaf_fma4 (float x, float y, float z)
+{
+  asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
+  return x;
+}
+# else
+#  undef HAS_FMA4
+#  define HAS_FMA4 0
+#  define __fmaf_fma4 NULL
+# endif
+
+
+libm_ifunc (__fmaf, HAS_FMA
+	    ? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2));
 weak_alias (__fmaf, fmaf)
 
 # define __fmaf __fmaf_sse2
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 0a145ca259..3fde5d94ce 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -86,7 +86,7 @@ __init_cpu_features (void)
 
 	    default:
 	      /* Unknown family 0x06 processors.  Assuming this is one
-	         of Core i3/i5/i7 processors if AVX is available.  */
+		 of Core i3/i5/i7 processors if AVX is available.  */
 	      if ((ecx & bit_AVX) == 0)
 		break;
 
@@ -131,6 +131,14 @@ __init_cpu_features (void)
       if ((ecx & 0x200))
 	__cpu_features.feature[index_Prefer_SSE_for_memop]
 	  |= bit_Prefer_SSE_for_memop;
+
+      __cpuid (0x80000000, eax, ebx, ecx, edx);
+      if (eax >= 0x80000001)
+	__cpuid (0x80000001,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
     }
   else
     kind = arch_kind_other;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index e8d48c2456..2fb6f75b66 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -53,6 +53,7 @@
 enum
   {
     COMMON_CPUID_INDEX_1 = 0,
+    COMMON_CPUID_INDEX_80000001,	/* for AMD */
     /* Keep the following line at the end.  */
     COMMON_CPUID_INDEX_MAX
   };
@@ -113,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
 # define HAS_FMA	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
+# define HAS_FMA4	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, 16)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1