about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-20 22:43:15 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-20 22:43:15 -0400
commited72b6545f6d20f2d29ed71d65394d4a75ad358e (patch)
treee47730c47098dfbf1d41d95210009fd4a5fc0e5c
parent8d4f46c613c4397c5531b959744541862cf09ad0 (diff)
downloadglibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar.gz
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar.xz
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.zip
Check for FMA4 support and generate appropriate fma functions
-rw-r--r--ChangeLog9
-rw-r--r--config.h.in3
-rw-r--r--sysdeps/i386/configure63
-rw-r--r--sysdeps/i386/configure.in11
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fma.c22
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fmaf.c22
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c10
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h2
8 files changed, 115 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index 6cf990699a..b2d629b953 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2011-10-20  Ulrich Drepper  <drepper@gmail.com>
 
+	* sysdeps/i386/configure.in: Test for -mfma4 option.
+	* config.h.in: Add HAVE_FMA4_SUPPORT entry.
+	* sysdeps/x86_64/multiarch/init-arch.h: Define HAS_FMA4 and
+	COMMON_CPUID_INDEX_80000001.
+	* sysdeps/x86_64/multiarch/init-arch.c: Read 80000001 leaf for AMD.
+	* sysdeps/x86_64/fpu/multiarch/s_fma.c: Test for FMA4 support and
+	use it if FMA3 is not supported.
+	* sysdeps/x86_64/fpu/multiarch/s_fmaf.c: Likewise.
+
 	* sysdeps/x86_64/multiarch/s_fma.c: Moved to ../fpu/multiarch.
 	* sysdeps/x86_64/multiarch/s_fmaf.c: Likewise.
 
diff --git a/config.h.in b/config.h.in
index 7b5095691d..7db663025a 100644
--- a/config.h.in
+++ b/config.h.in
@@ -118,6 +118,9 @@
 /* Define if gcc supports AVX.  */
 #undef	HAVE_AVX_SUPPORT
 
+/* Define if gcc supports FMA4.  */
+#undef	HAVE_FMA4_SUPPORT
+
 /* Define if the compiler's exception support is based on libunwind.  */
 #undef	HAVE_CC_WITH_LIBUNWIND
 
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure
index adffe3f9a9..ae494e2712 100644
--- a/sysdeps/i386/configure
+++ b/sysdeps/i386/configure
@@ -167,7 +167,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 	ac_retval=1
 fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_compile
@@ -193,7 +193,7 @@ $as_echo "$ac_try_echo"; } >&5
     mv -f conftest.er1 conftest.err
   fi
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } >/dev/null && {
+  test $ac_status = 0; } > conftest.i && {
 	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
 	 test ! -s conftest.err
        }; then :
@@ -204,7 +204,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
     ac_retval=1
 fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_cpp
@@ -217,10 +217,10 @@ fi
 ac_fn_c_check_header_mongrel ()
 {
   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  if eval "test \"\${$3+set}\"" = set; then :
+  if eval \${$3+:} false; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
 $as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
+if eval \${$3+:} false; then :
   $as_echo_n "(cached) " >&6
 fi
 eval ac_res=\$$3
@@ -256,7 +256,7 @@ if ac_fn_c_try_cpp "$LINENO"; then :
 else
   ac_header_preproc=no
 fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
 $as_echo "$ac_header_preproc" >&6; }
 
@@ -283,7 +283,7 @@ $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
 esac
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
 $as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
+if eval \${$3+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   eval "$3=\$ac_header_compiler"
@@ -292,7 +292,7 @@ eval ac_res=\$$3
 	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
 $as_echo "$ac_res" >&6; }
 fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_c_check_header_mongrel
 
@@ -333,7 +333,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
        ac_retval=$ac_status
 fi
   rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_run
@@ -347,7 +347,7 @@ ac_fn_c_check_header_compile ()
   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
 $as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
+if eval \${$3+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -365,7 +365,7 @@ fi
 eval ac_res=\$$3
 	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
 $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_c_check_header_compile
 # This file is generated from configure.in by Autoconf.  DO NOT EDIT!
@@ -375,7 +375,7 @@ $as_echo "$ac_res" >&6; }
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
 $as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if ${ac_cv_path_GREP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -z "$GREP"; then
@@ -438,7 +438,7 @@ $as_echo "$ac_cv_path_GREP" >&6; }
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
 $as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if ${ac_cv_path_EGREP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@@ -505,7 +505,7 @@ $as_echo "$ac_cv_path_EGREP" >&6; }
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
 $as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if ${ac_cv_header_stdc+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -633,7 +633,7 @@ done
 
 
 ac_fn_c_check_header_mongrel "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "$ac_includes_default"
-if test "x$ac_cv_header_cpuid_h" = x""yes; then :
+if test "x$ac_cv_header_cpuid_h" = xyes; then :
 
 else
   as_fn_error $? "gcc must provide the <cpuid.h> header" "$LINENO" 5
@@ -643,7 +643,7 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if -g produces usable source locations for assembler-with-cpp" >&5
 $as_echo_n "checking if -g produces usable source locations for assembler-with-cpp... " >&6; }
-if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then :
+if ${libc_cv_cpp_asm_debuginfo+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat > conftest.S <<EOF
@@ -693,7 +693,7 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE4 support" >&5
 $as_echo_n "checking for SSE4 support... " >&6; }
-if test "${libc_cv_cc_sse4+set}" = set; then :
+if ${libc_cv_cc_sse4+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null'
@@ -716,7 +716,7 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler -mtune=i686 support" >&5
 $as_echo_n "checking for assembler -mtune=i686 support... " >&6; }
-if test "${libc_cv_as_i686+set}" = set; then :
+if ${libc_cv_as_i686+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null'
@@ -735,7 +735,7 @@ $as_echo "$libc_cv_as_i686" >&6; }
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5
 $as_echo_n "checking for AVX support... " >&6; }
-if test "${libc_cv_cc_avx+set}" = set; then :
+if ${libc_cv_cc_avx+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null'
@@ -756,9 +756,32 @@ if test $libc_cv_cc_avx = yes; then
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
+$as_echo_n "checking for FMA4 support... " >&6; }
+if ${libc_cv_cc_fma4+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+  libc_cv_cc_fma4=yes
+else
+  libc_cv_cc_fma4=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5
+$as_echo "$libc_cv_cc_fma4" >&6; }
+if test $libc_cv_cc_fma4 = yes; then
+  $as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h
+
+fi
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5
 $as_echo_n "checking for -mno-vzeroupper support... " >&6; }
-if test "${libc_cv_cc_novzeroupper+set}" = set; then :
+if ${libc_cv_cc_novzeroupper+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null'
diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in
index 67fd1d7df1..5a9840e16c 100644
--- a/sysdeps/i386/configure.in
+++ b/sysdeps/i386/configure.in
@@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
   AC_DEFINE(HAVE_AVX_SUPPORT)
 fi
 
+dnl Check if -mfma4 works.
+AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
+if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
+  libc_cv_cc_fma4=yes
+else
+  libc_cv_cc_fma4=no
+fi])
+if test $libc_cv_cc_fma4 = yes; then
+  AC_DEFINE(HAVE_FMA4_SUPPORT)
+fi
+
 dnl Check if -mno-vzeroupper works.
 AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl
 if AC_TRY_COMMAND([${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null]); then
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 9a680c68fc..06f2d001d9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -1,5 +1,5 @@
 /* FMA version of fma.
-   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -28,13 +28,29 @@ extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
 
 
 static double
-__fma_fma (double x, double y, double z)
+__fma_fma3 (double x, double y, double z)
 {
   asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
   return x;
 }
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
+
+# ifdef HAVE_FMA4_SUPPORT
+static double
+__fma_fma4 (double x, double y, double z)
+{
+  asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
+  return x;
+}
+# else
+#  undef HAS_FMA4
+#  define HAS_FMA4 0
+#  define __fma_fma4 NULL
+# endif
+
+
+libm_ifunc (__fma, HAS_FMA
+	    ? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2));
 weak_alias (__fma, fma)
 
 # define __fma __fma_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 85ef65a50e..53c08de47c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -1,5 +1,5 @@
 /* FMA version of fmaf.
-   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -27,13 +27,29 @@ extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
 
 
 static float
-__fmaf_fma (float x, float y, float z)
+__fmaf_fma3 (float x, float y, float z)
 {
   asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
   return x;
 }
 
-libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
+
+# ifdef HAVE_FMA4_SUPPORT
+static float
+__fmaf_fma4 (float x, float y, float z)
+{
+  asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
+  return x;
+}
+# else
+#  undef HAS_FMA4
+#  define HAS_FMA4 0
+#  define __fmaf_fma4 NULL
+# endif
+
+
+libm_ifunc (__fmaf, HAS_FMA
+	    ? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2));
 weak_alias (__fmaf, fmaf)
 
 # define __fmaf __fmaf_sse2
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 0a145ca259..3fde5d94ce 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -86,7 +86,7 @@ __init_cpu_features (void)
 
 	    default:
 	      /* Unknown family 0x06 processors.  Assuming this is one
-	         of Core i3/i5/i7 processors if AVX is available.  */
+		 of Core i3/i5/i7 processors if AVX is available.  */
 	      if ((ecx & bit_AVX) == 0)
 		break;
 
@@ -131,6 +131,14 @@ __init_cpu_features (void)
       if ((ecx & 0x200))
 	__cpu_features.feature[index_Prefer_SSE_for_memop]
 	  |= bit_Prefer_SSE_for_memop;
+
+      __cpuid (0x80000000, eax, ebx, ecx, edx);
+      if (eax >= 0x80000001)
+	__cpuid (0x80000001,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
+		 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
     }
   else
     kind = arch_kind_other;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index e8d48c2456..2fb6f75b66 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -53,6 +53,7 @@
 enum
   {
     COMMON_CPUID_INDEX_1 = 0,
+    COMMON_CPUID_INDEX_80000001,	/* for AMD */
     /* Keep the following line at the end.  */
     COMMON_CPUID_INDEX_MAX
   };
@@ -113,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
 # define HAS_FMA	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
+# define HAS_FMA4	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, 16)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1