about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/math_private.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-17 11:23:40 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-17 11:23:40 -0400
commited22dcf691326d148222eb9a215d6d98bb8073a4 (patch)
tree9a7f83e6295b8ac6610e6637defc66f693ea252c /sysdeps/x86_64/fpu/math_private.h
parentb171c137687dd4328f756d141d17f30bae750079 (diff)
downloadglibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.gz
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.xz
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.zip
Provide internal optimizations on x86-64 with SSE4.1
Provide macros so that the internal users can, if possible, directly use
the new instructions.

Also fix up the mathinline.h header when compiling with SSE4.1 enabled.
Diffstat (limited to 'sysdeps/x86_64/fpu/math_private.h')
-rw-r--r--sysdeps/x86_64/fpu/math_private.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 523ec549ac..71eb41664f 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -90,3 +90,31 @@ do {								\
   ({ long double __res;							      \
      asm ("fsqrt" : "=t" (__res) : "0" ((long double) d));		      \
      __res; })
+
+#ifdef __SSE4_1__
+# ifndef __rint
+#  define __rint(d) \
+  ({ double __res; \
+     asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d));	      \
+     __res; })
+# endif
+# ifndef __rintf
+#  define __rintf(d) \
+  ({ float __res; \
+     asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d));	      \
+     __res; })
+# endif
+
+# ifndef __floor
+#  define __floor(d) \
+  ({ double __res; \
+     asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d));	      \
+     __res; })
+# endif
+# ifndef __floorf
+#  define __floorf(d) \
+  ({ float __res; \
+     asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d));	      \
+     __res; })
+# endif
+#endif