PowerPC: Arithmetic function optimizations for POWER

This patch creates inline assembly functions that use intrinsic PPC floating point instructions when the platform supports them but rely on the internal GLIBC functions when the instructions are not implemented (for instance, on POWER4).
author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2011-12-29 18:36:35 -0500
committer: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2011-12-29 18:37:37 -0500
commit: da5da702f3b6ecf48281a53b48257c02c7fe9c03 (patch)
tree: 3101067ed908771a23a40b5fc8998f7785bb073e
parent: 41f2ffe0862d97aa84f1495d6f6be3a8030a2a5f (diff)
download: glibc-da5da702f3b6ecf48281a53b48257c02c7fe9c03.tar.gz
glibc-da5da702f3b6ecf48281a53b48257c02c7fe9c03.tar.xz
glibc-da5da702f3b6ecf48281a53b48257c02c7fe9c03.zip
6 files changed, 149 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index db727b3727..63e7ed699a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-11-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/fpu/math_private.h: Using inline assembly version
+	of math functions ceil, trunc, floor, round, and sqrt, when
+	avaliable on the platform.
+	* sysdeps/powerpc/fpu/e_sqrt.c: Undefine __ieee754_sqrt to avoid
+	name clash.
+	* sysdeps/powerpc/fpu/e_sqrtf.c: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c: Likewise.
+
 2011-10-12  Ulrich Drepper  <drepper@gmail.com>
 
 	Add integration with gcc's -ffinite-math-only and optimize wrapper
diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c
index f9ded25717..d59bd08d5c 100644
--- a/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/fpu/e_sqrt.c
@@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x)
   return f_wash (x);
 }
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
diff --git a/sysdeps/powerpc/fpu/e_sqrtf.c b/sysdeps/powerpc/fpu/e_sqrtf.c
index 965faee842..9c6b860c96 100644
--- a/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/fpu/e_sqrtf.c
@@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x)
   return f_washf (x);
 }
 
-
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 90021c6d3c..c4dd217d1d 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -1,5 +1,5 @@
 /* Private inline math functions for powerpc.
-   Copyright (C) 2006
+   Copyright (C) 2006, 2011
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -25,12 +25,145 @@
 #include <ldsodefs.h>
 #include <dl-procinfo.h>
 
+#include <math/math_private.h>
+
 # if __WORDSIZE == 64 || defined _ARCH_PWR4
 #  define __CPU_HAS_FSQRT 1
+
+#ifndef __ieee754_sqrt
+# define __ieee754_sqrt(x)		\
+  ({ double __z;			\
+     __asm __volatile (			\
+	"	fsqrt %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f"(x));		\
+     __z; })
+#endif
+#ifndef __ieee754_sqrtf
+# define __ieee754_sqrtf(x)		\
+  ({ float __z;				\
+     __asm __volatile (			\
+	"	fsqrts %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f"(x));		\
+     __z; })
+#endif
+
 # else
 #  define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
+# endif	// __WORDSIZE == 64 || defined _ARCH_PWR4
+
+
+#if defined _ARCH_PWR5X
+
+# ifndef __round
+#  define __round(x)			\
+    ({ double __z;			\
+      __asm __volatile (		\
+	"	frin %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __roundf
+#  define __roundf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	frin %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+# ifndef __trunc
+#  define __trunc(x)			\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	friz %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __truncf
+#  define __truncf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	friz %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+# ifndef __ceil
+#  define __ceil(x)			\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	frip %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __ceilf
+#  define __ceilf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	frip %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
 # endif
 
+# ifndef __floor
+#  define __floor(x)			\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	frim %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __floorf
+#  define __floorf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	frim %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+#endif	/* defined _ARCH_PWR5X */
+
+
+#if defined _ARCH_PWR6
+
+# ifndef __copysign
+#  define __copysign(x, y)		\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	fcpsgn %0,%1,%2\n"	\
+		: "=f" (__z)		\
+		: "f" (y), "f" (x));	\
+     __z; })
+# endif
+# ifndef __copysignf
+#  define __copysignf(x, y)		\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	fcpsgn %0,%1,%2\n"	\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (y), "f" (x));	\
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR6 */
+
+
 # ifndef __LIBC_INTERNAL_MATH_INLINES
 extern double __slow_ieee754_sqrt (double);
 __inline double
@@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x)
 }
 #endif /* __LIBC_INTERNAL_MATH_INLINES */
 
-#include <math/math_private.h>
-
 #endif /* _PPC_MATH_PRIVATE_H_ */
diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
index 314abba3b2..66d04ceb77 100644
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
@@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
index 7157214394..847a2e4ad6 100644
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
@@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>
 
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {
author	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2011-12-29 18:36:35 -0500
committer	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2011-12-29 18:37:37 -0500
commit	da5da702f3b6ecf48281a53b48257c02c7fe9c03 (patch)
tree	3101067ed908771a23a40b5fc8998f7785bb073e
parent	41f2ffe0862d97aa84f1495d6f6be3a8030a2a5f (diff)
download	glibc-da5da702f3b6ecf48281a53b48257c02c7fe9c03.tar.gz glibc-da5da702f3b6ecf48281a53b48257c02c7fe9c03.tar.xz glibc-da5da702f3b6ecf48281a53b48257c02c7fe9c03.zip