about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2012-01-28 21:20:06 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-28 21:20:06 -0500
commit96bc5b45a671de1e266b841edba2516afc46d84a (patch)
treeedb645d2ef40b80fb2827aa2abf7a6e575a1f20e
parent56f6f6a2403cfa7267cad722597113be35ecf70d (diff)
downloadglibc-96bc5b45a671de1e266b841edba2516afc46d84a.tar.gz
glibc-96bc5b45a671de1e266b841edba2516afc46d84a.tar.xz
glibc-96bc5b45a671de1e266b841edba2516afc46d84a.zip
Optimize x86-64 math inline header a bit
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/x86_64/fpu/bits/mathinline.h39
2 files changed, 38 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 6702ad3485..5866bacb34 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2012-01-28  Ulrich Drepper  <drepper@gmail.com>
 
+	* sysdeps/x86_64/fpu/bits/mathinline.h (__signbitl): Optimize a bit.
+	Avoid unnecessary __WORDSIZE == 64 test.
+	(fmaxf): Use VEX format if possible.
+	(fmax): Likewise.
+	(fminf): Likewise.
+	(fmin): Likewise.
+
 	* config.h.in: Define HAVE_SSE2AVX_SUPPORT.
 	* math/math_private.h: Remove libc_fegetround* and
 	libc_fesetround*.
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 1a2c1ee997..038c64ee46 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -1,5 +1,5 @@
 /* Inline math functions for x86-64.
-   Copyright (C) 2002-2004, 2007, 2009, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2002-2004,2007,2009,2011,2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -64,21 +64,20 @@ __MATH_INLINE int
 __NTH (__signbitl (long double __x))
 {
   __extension__ union { long double __l; int __i[3]; } __u = { __l: __x };
-  return (__u.__i[2] & 0x8000) != 0;
+  return __u.__i[2] & 0x8000;
 }
 
 __END_NAMESPACE_C99
 #endif
 
 
-#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
-     && defined __OPTIMIZE__)
+#if __GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES && defined __OPTIMIZE__
 
 # ifdef __USE_ISOC99
 __BEGIN_NAMESPACE_C99
 
 /* Round to nearest integer.  */
-#  if __WORDSIZE == 64 || defined __SSE_MATH__
+#  ifdef __SSE_MATH__
 __MATH_INLINE long int
 __NTH (lrintf (float __x))
 {
@@ -87,7 +86,7 @@ __NTH (lrintf (float __x))
   return __res;
 }
 #  endif
-#  if __WORDSIZE == 64 || defined __SSE2_MATH__
+#  ifdef __SSE2_MATH__
 __MATH_INLINE long int
 __NTH (lrint (double __x))
 {
@@ -114,40 +113,64 @@ __NTH (llrint (double __x))
 #  endif
 
 #  if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
-      && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+      && defined __SSE2_MATH__
 /* Determine maximum of two values.  */
 __MATH_INLINE float
 __NTH (fmaxf (float __x, float __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 __MATH_INLINE double
 __NTH (fmax (double __x, double __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 
 /* Determine minimum of two values.  */
 __MATH_INLINE float
 __NTH (fminf (float __x, float __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 __MATH_INLINE double
 __NTH (fmin (double __x, double __y))
 {
+#   ifdef __AVX__
+  float __res;
+  __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y));
+  return __res;
+#   else
   __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y));
   return __x;
+#   endif
 }
 #  endif
 
 __END_NAMESPACE_C99
 # endif
 
-# if defined __SSE4_1__ && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+# if defined __SSE4_1__ && defined __SSE2_MATH__
 #  if defined __USE_MISC || defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99
 __BEGIN_NAMESPACE_C99