about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-17 11:23:40 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-17 11:23:40 -0400
commited22dcf691326d148222eb9a215d6d98bb8073a4 (patch)
tree9a7f83e6295b8ac6610e6637defc66f693ea252c /sysdeps
parentb171c137687dd4328f756d141d17f30bae750079 (diff)
downloadglibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.gz
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.xz
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.zip
Provide internal optimizations on x86-64 with SSE4.1
Provide macros so that the internal users can, if possible, directly use
the new instructions.

Also fix up the mathinline.h header when compiling with SSE4.1 enabled.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/fpu/bits/mathinline.h31
-rw-r--r--sysdeps/x86_64/fpu/math_private.h28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floor-c.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floorf-c.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rint-c.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rintf-c.c1
6 files changed, 51 insertions, 12 deletions
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 210bef8102..721f6e4731 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -30,34 +30,35 @@
 #endif
 
 
-#if defined __GNUC__ && __GNUC__ >= 2
-# ifdef __USE_ISOC99
+/* The gcc, version 2.7 or below, has problems with all this inlining
+   code.  So disable it for this version of the compiler.  */
+#if __GNUC_PREREQ (2, 8) && defined __USE_ISOC99
 __BEGIN_NAMESPACE_C99
 
 /* Test for negative number.  Used in the signbit() macro.  */
 __MATH_INLINE int
 __NTH (__signbitf (float __x))
 {
-#  if __WORDSIZE == 32
+# if __WORDSIZE == 32
   __extension__ union { float __f; int __i; } __u = { __f: __x };
   return __u.__i < 0;
-#  else
+# else
   int __m;
   __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
   return __m & 0x8;
-#  endif
+# endif
 }
 __MATH_INLINE int
 __NTH (__signbit (double __x))
 {
-#  if __WORDSIZE == 32
+# if __WORDSIZE == 32
   __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
   return __u.__i[1] < 0;
-#  else
+# else
   int __m;
   __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
   return __m & 0x80;
-#  endif
+# endif
 }
 __MATH_INLINE int
 __NTH (__signbitl (long double __x))
@@ -66,6 +67,16 @@ __NTH (__signbitl (long double __x))
   return (__u.__i[2] & 0x8000) != 0;
 }
 
+__END_NAMESPACE_C99
+#endif
+
+
+#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
+     && defined __OPTIMIZE__)
+
+# ifdef __USE_ISOC99
+__BEGIN_NAMESPACE_C99
+
 /* Round to nearest integer.  */
 #  if __WORDSIZE == 64 || defined __SSE_MATH__
 __MATH_INLINE long int
@@ -100,14 +111,10 @@ __NTH (llrint (double __x))
   __asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x));
   return __res;
 }
-
-__END_NAMESPACE_C99
 #  endif
 
 #  if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
       && (__WORDSIZE == 64 || defined __SSE2_MATH__)
-__BEGIN_NAMESPACE_C99
-
 /* Determine maximum of two values.  */
 __MATH_INLINE float
 __NTH (fmaxf (float __x, float __y))
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 523ec549ac..71eb41664f 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -90,3 +90,31 @@ do {								\
   ({ long double __res;							      \
      asm ("fsqrt" : "=t" (__res) : "0" ((long double) d));		      \
      __res; })
+
+#ifdef __SSE4_1__
+# ifndef __rint
+#  define __rint(d) \
+  ({ double __res; \
+     asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d));	      \
+     __res; })
+# endif
+# ifndef __rintf
+#  define __rintf(d) \
+  ({ float __res; \
+     asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d));	      \
+     __res; })
+# endif
+
+# ifndef __floor
+#  define __floor(d) \
+  ({ double __res; \
+     asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d));	      \
+     __res; })
+# endif
+# ifndef __floorf
+#  define __floorf(d) \
+  ({ float __res; \
+     asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d));	      \
+     __res; })
+# endif
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
index 8b8c31d945..68733b69ef 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
@@ -1,2 +1,3 @@
+#undef __floor
 #define __floor __floor_c
 #include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
index 3f367863a7..2386362328 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
@@ -1,2 +1,3 @@
+#undef __floorf
 #define __floorf __floorf_c
 #include <sysdeps/ieee754/flt-32/s_floorf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
index f29f45b062..162a630ff9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
@@ -1,2 +1,3 @@
+#undef __rint
 #define __rint __rint_c
 #include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
index 30ed42a656..8505249f34 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
@@ -1,2 +1,3 @@
+#undef __rintf
 #define __rintf __rintf_c
 #include <sysdeps/ieee754/flt-32/s_rintf.c>