about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/i386/configure23
-rw-r--r--sysdeps/i386/configure.in11
-rw-r--r--sysdeps/x86_64/fpu/bits/mathinline.h39
-rw-r--r--sysdeps/x86_64/fpu/math_private.h38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile22
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_atan2.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sin.c4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan.c2
11 files changed, 91 insertions, 57 deletions
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure
index ae494e2712..bc7900ee2e 100644
--- a/sysdeps/i386/configure
+++ b/sysdeps/i386/configure
@@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
+$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
+if ${libc_cv_cc_sse2avx+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+  libc_cv_cc_sse2avx=yes
+else
+  libc_cv_cc_sse2avx=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
+$as_echo "$libc_cv_cc_sse2avx" >&6; }
+if test $libc_cv_cc_sse2avx = yes; then
+  $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
+
+fi
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
 $as_echo_n "checking for FMA4 support... " >&6; }
 if ${libc_cv_cc_fma4+:} false; then :
diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in
index 5a9840e16c..59a4cd6ea1 100644
--- a/sysdeps/i386/configure.in
+++ b/sysdeps/i386/configure.in
@@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
   AC_DEFINE(HAVE_AVX_SUPPORT)
 fi
 
+dnl Check if -msse2avx works.
+AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
+if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
+  libc_cv_cc_sse2avx=yes
+else
+  libc_cv_cc_sse2avx=no
+fi])
+if test $libc_cv_cc_sse2avx = yes; then
+  AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
+fi
+
 dnl Check if -mfma4 works.
 AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
 if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 1a2c1ee997..038c64ee46 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -1,5 +1,5 @@
 /* Inline math functions for x86-64.
-   Copyright (C) 2002-2004, 2007, 2009, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2002-2004,2007,2009,2011,2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -64,21 +64,20 @@ __MATH_INLINE int
 __NTH (__signbitl (long double __x))
 {
   __extension__ union { long double __l; int __i[3]; } __u = { __l: __x };
-  return (__u.__i[2] & 0x8000) != 0;
+  return __u.__i[2] & 0x8000;
 }
 
 __END_NAMESPACE_C99
 #endif
 
 
-#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
-     && defined __OPTIMIZE__)
+#if __GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES && defined __OPTIMIZE__
 
 # ifdef __USE_ISOC99
 __BEGIN_NAMESPACE_C99
 
 /* Round to nearest integer.  */
-#  if __WORDSIZE == 64 || defined __SSE_MATH__
+#  ifdef __SSE_MATH__
 __MATH_INLINE long int
 __NTH (lrintf (float __x))
 {
@@ -87,7 +86,7 @@ __NTH (lrintf (float __x))
   return __res;
 }
 #  endif
-#  if __WORDSIZE == 64 || defined __SSE2_MATH__
+#  ifdef __SSE2_MATH__
 __MATH_INLINE long int
 __NTH (lrint (double __x))
 {
@@ -114,40 +113,64 @@ __NTH (llrint (double __x))
 #  endif
 
 #  if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
-      && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+      && defined __SSE2_MATH__
 /* Determine maximum of two values.  */
 __MATH_INLINE float
 __NTH (fmaxf (float __x, float __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 __MATH_INLINE double
 __NTH (fmax (double __x, double __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 
 /* Determine minimum of two values.  */
 __MATH_INLINE float
 __NTH (fminf (float __x, float __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 __MATH_INLINE double
 __NTH (fmin (double __x, double __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 #  endif
 
 __END_NAMESPACE_C99
 # endif
 
-# if defined __SSE4_1__ && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+# if defined __SSE4_1__ && defined __SSE2_MATH__
 #  if defined __USE_MISC || defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99
 __BEGIN_NAMESPACE_C99
 
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 50f765ff2f..8e79718262 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -19,7 +19,7 @@
 
 /* We can do a few things better on x86-64.  */
 
-#ifdef __AVX__
+#if defined __AVX__ || defined SSE2AVX
 # define MOVD "vmovd"
 # define STMXCSR "vstmxcsr"
 # define LDMXCSR "vldmxcsr"
@@ -90,7 +90,7 @@
   ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
      (__di & 0x7fffffff) < 0x7f800000; })
 
-#ifdef __AVX__
+#if defined __AVX__ || defined SSE2AVX
 # define __ieee754_sqrt(d) \
   ({ double __res;							      \
     asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
@@ -116,7 +116,7 @@
 
 #ifdef __SSE4_1__
 # ifndef __rint
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __rint(d) \
   ({ double __res; \
     asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));      \
@@ -129,7 +129,7 @@
 #  endif
 # endif
 # ifndef __rintf
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __rintf(d) \
   ({ float __res; \
     asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));      \
@@ -143,7 +143,7 @@
 # endif
 
 # ifndef __floor
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __floor(d) \
   ({ double __res; \
     asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));      \
@@ -156,7 +156,7 @@
 #  endif
 # endif
 # ifndef __floorf
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __floorf(d) \
   ({ float __res; \
     asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));      \
@@ -173,29 +173,6 @@
 
 /* Specialized variants of the <fenv.h> interfaces which only handle
    either the FPU or the SSE unit.  */
-#undef libc_fegetround
-#define libc_fegetround() \
-  ({									      \
-     unsigned int mxcsr;						      \
-     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
-     (mxcsr & 0x6000) >> 3;						      \
-  })
-#undef libc_fegetroundf
-#define libc_fegetroundf() libc_fegetround ()
-// #define libc_fegetroundl() fegetround ()
-
-#undef libc_fesetround
-#define libc_fesetround(r) \
-  do {									      \
-     unsigned int mxcsr;						      \
-     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
-     mxcsr = (mxcsr & ~0x6000) | ((r) << 3);				      \
-     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
-  } while (0)
-#undef libc_fesetroundf
-#define libc_fesetroundf(r) libc_fesetround (r)
-// #define libc_fesetroundl(r) (void) fesetround (r)
-
 #undef libc_feholdexcept
 #define libc_feholdexcept(e) \
   do {									      \
@@ -224,7 +201,8 @@
 
 #undef libc_fetestexcept
 #define libc_fetestexcept(e) \
-  ({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));	      \
+  ({ unsigned int mxcsr;						      \
+     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
      mxcsr & (e) & FE_ALL_EXCEPT; })
 #undef libc_fetestexceptf
 #define libc_fetestexceptf(e) libc_fetestexcept (e)
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index be689036dc..4b5c173145 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 endif
 
-ifeq ($(config-cflags-avx),yes)
+ifeq ($(config-cflags-sse2avx),yes)
 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
 			e_atan2-avx s_sin-avx s_tan-avx \
 			mplog-avx mpa-avx slowexp-avx \
 			mpexp-avx
 
-CFLAGS-e_atan2-avx.c = -mavx
-CFLAGS-e_exp-avx.c = -mavx
-CFLAGS-e_log-avx.c = -mavx
-CFLAGS-mpa-avx.c = -mavx
-CFLAGS-mpexp-avx.c = -mavx
-CFLAGS-mplog-avx.c = -mavx
-CFLAGS-s_atan-avx.c = -mavx
-CFLAGS-s_sin-avx.c = -mavx
-CFLAGS-slowexp-avx.c = -mavx
-CFLAGS-s_tan-avx.c = -mavx
+CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
+CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
 endif
 endif
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 3a615fc9d1..6867c6e64e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
 
 libm_ifunc (__ieee754_atan2,
 	    HAS_FMA4 ? __ieee754_atan2_fma4
-	    : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
+	    : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
 strong_alias (__ieee754_atan2, __atan2_finite)
 
 # define __ieee754_atan2 __ieee754_atan2_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index 7b2320a223..3c650287c5 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
 
 libm_ifunc (__ieee754_exp,
 	    HAS_FMA4 ? __ieee754_exp_fma4
-	    : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
+	    : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
 strong_alias (__ieee754_exp, __exp_finite)
 
 # define __ieee754_exp __ieee754_exp_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index ab277d67fe..05f36680be 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double);
 
 libm_ifunc (__ieee754_log,
 	    HAS_FMA4 ? __ieee754_log_fma4
-	    : (HAS_YMM_USABLE ? __ieee754_log_avx
-	       : __ieee754_log_sse2));
+	    : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
 strong_alias (__ieee754_log, __log_finite)
 
 # define __ieee754_log __ieee754_log_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 78c7e099cb..ae16d7c9bb 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -13,7 +13,7 @@ extern double __atan_fma4 (double);
 # endif
 
 libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
-		   HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
+		   HAS_AVX ? __atan_avx : __atan_sse2));
 
 # define atan __atan_sse2
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 417acd0683..a0c2521c98 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -18,11 +18,11 @@ extern double __sin_fma4 (double);
 # endif
 
 libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
-		    HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
+		    HAS_AVX ? __cos_avx : __cos_sse2));
 weak_alias (__cos, cos)
 
 libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
-		    HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
+		    HAS_AVX ? __sin_avx : __sin_sse2));
 weak_alias (__sin, sin)
 
 # define __cos __cos_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 3047155cec..904308fada 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -13,7 +13,7 @@ extern double __tan_fma4 (double);
 # endif
 
 libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
-		  HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
+		  HAS_AVX ? __tan_avx : __tan_sse2));
 
 # define tan __tan_sse2
 #endif