PowerPC - Optimization for nearbyint[f]

This patch provides throughput boost for the nearbyint[f] functions for POWER. For POWER7, it improves performance for nearbyintf from 5 (ppc32) to 6 times (ppc64) and for nearbyint from 2.5 up to 5 times. For POWER6 it improves nearbyintf up to 2x (ppc64) and nearbyint up to 4x.
author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2011-12-29 19:41:37 -0500
committer: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2011-12-29 19:41:37 -0500
commit: a960be3fad233bd721f64dfe62d3f0ee798d26e3 (patch)
tree: c2edb02f8d6150d6cdcc15568b12fc642abc161d
parent: e73988fd47a8ab815e4a4427683ac1098c8ff31f (diff)
download: glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.tar.gz
glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.tar.xz
glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.zip
6 files changed, 293 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index a4b9cb04b2..ce94df9271 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2011-11-18  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+	* math/libm-test.inc: Added more nerabyint tests.
+	* sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S: New file: nearbyint 
+	optimized for PPC32.
+	* sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S: New file: nearbyintf
+	optimized for PPC32.
+	* sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S: New file: nearbyint
+	optimized for PPC64.
+	* sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S: New file: nearbyintf
+	optimized for PPC64.
+
 2011-10-22  Andreas Schwab  <schwab@linux-m68k.org>
 
 	* sysdeps/ieee754/ldbl-128ibm/s_atanl.c: Include <math.h> before
diff --git a/math/libm-test.inc b/math/libm-test.inc
index c6ed7a39fe..54dbdf100b 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4607,6 +4607,10 @@ nearbyint_test (void)
   TEST_f_f (nearbyint, minus_infty, minus_infty);
   TEST_f_f (nearbyint, nan_value, nan_value);
 
+  /* Subnormal values */
+  TEST_f_f (nearbyint, -8.98847e+307, -8.98847e+307);
+  TEST_f_f (nearbyint, -4.45015e-308, minus_zero);
+
   /* Default rounding mode is round to nearest.  */
   TEST_f_f (nearbyint, 0.5, 0.0);
   TEST_f_f (nearbyint, 1.5, 2.0);
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S b/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S
new file mode 100644
index 0000000000..0102aa3318
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S
@@ -0,0 +1,80 @@
+/* Round to int floating-point values.  PowerPC32 version.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+/* This has been coded in assembler because GCC makes such a mess of it
+   when it's coded in C.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+
+/* double [fp1] nearbyint(double [fp1] x) */
+
+	.section	.rodata.cst4,"aM",@progbits,4
+	.align	2
+.LC0:	/* 2**52 */
+	.long 0x59800000	/* TWO52: 2**52 */
+
+	.section	".text"
+ENTRY (__nearbyint)
+#ifdef SHARED
+	mflr	r11
+	cfi_register(lr,r11)
+	bcl	20,31,1f
+1:	mflr	r9
+	addis	r9,r9,.LC0-1b@ha
+	lfs	fp13,.LC0-1b@l(r9)
+	mtlr	r11
+	cfi_same_value (lr)
+#else
+	lis	r9,.LC0@ha
+	lfs	fp13,.LC0@l(r9)
+#endif
+	fabs	fp0,fp1
+	fsub    fp12,fp13,fp13	/* generate 0.0  */
+	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52 */
+	bgelr	cr7
+	fcmpu	cr7,fp1,fp12	/* if (x > 0.0 */
+	ble	cr7,L(lessthanzero)
+	mtfsb0	4*cr7+lt	/* Disable FE_INEXACT exception */
+	fadd	fp0,fp1,fp13	/* x += TWO52 */
+	fsub	fp1,fp0,fp13	/* x -= TWO52 */
+	fabs	fp1,fp1		/* if (x == 0.0 */
+	mtfsb0	4*cr1+eq	/* Clear any FE_INEXACT exception */
+	blr
+L(lessthanzero):
+	bgelr	cr7
+	mtfsb0	4*cr7+lt	/* Disable FE_INEXACT exception */
+	fsub	fp0,fp13,fp1	/* x -= TWO52 */
+	fsub	fp0,fp0,fp13	/* x += TWO52 */
+	fneg	fp1,fp0		/* if (x == 0.0) */
+	mtfsb0	4*cr1+eq	/* Clear any FE_INEXACT exception */
+	blr
+END (__nearbyint)
+
+weak_alias (__nearbyint, nearbyint)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__nearbyint, nearbyintl)
+strong_alias (__nearbyint, __nearbyintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S b/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S
new file mode 100644
index 0000000000..519022806b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S
@@ -0,0 +1,70 @@
+/* Round to int floating-point values.  PowerPC32 version.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+/* This has been coded in assembler because GCC makes such a mess of it
+   when it's coded in C.  */
+
+#include <sysdep.h>
+
+
+/* float [fp1] nearbyintf(float [fp1] x) */
+
+	.section	.rodata.cst4,"aM",@progbits,4
+	.align	2
+.LC0:
+	.long 0x4B000000	/* TWO23: 2**23 */
+
+	.section	".text"
+ENTRY (__nearbyintf)
+#ifdef SHARED
+	mflr	r11
+	cfi_register(lr,r11)
+	bcl	20,31,1f
+1:	mflr	r9
+	addis	r9,r9,.LC0-1b@ha
+	lfs	fp13,.LC0-1b@l(r9)
+	mtlr	r11
+	cfi_same_value (lr)
+#else
+	lis	r9,.LC0@ha
+	lfs	fp13,.LC0@l(r9)
+#endif
+	fabs	fp0,fp1
+	fsub    fp12,fp13,fp13		/* generate 0.0  */
+	fcmpu	cr7,fp0,fp13		/* if (fabs(x) > TWO23 */
+	bgelr	cr7
+	fcmpu	cr7,fp1,fp12		/* if (x > 0.0 */
+	ble	cr7,L(lessthanzero)
+	mtfsb0	4*cr7+lt		/* Disable FE_INEXACT exception */
+	fadds	fp0,fp1,fp13		/* x += TWO23 */
+	fsubs	fp1,fp0,fp13		/* x -= TWO23 */
+	mtfsb0	4*cr1+eq		/* Clear any FE_INEXACT exception */
+	blr
+L(lessthanzero):
+	bgelr	cr7
+	mtfsb0	4*cr7+lt		/* Disable FE_INEXACT exception */
+	fsubs	fp0,fp13,fp1		/* x -= TWO23 */
+	fsubs	fp0,fp0,fp13		/* x += TWO23 */
+	fneg	fp1,fp0			/* if (x == 0.0) */
+	mtfsb0	4*cr1+eq		/* Clear any FE_INEXACT exception */
+	blr
+END (__nearbyintf)
+
+weak_alias (__nearbyintf, nearbyintf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S
new file mode 100644
index 0000000000..436160f48e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S
@@ -0,0 +1,68 @@
+/* Round to int floating-point values.  PowerPC64 version.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This has been coded in assembler because GCC makes such a mess of it
+   when it's coded in C.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+
+/* double [fp1] nearbyint(double [fp1] x) */
+
+	.section	".toc","aw"
+.LC0:	/* 2**52 */
+	.tc FD_43300000_0[TC],0x4330000000000000
+	.section	".text"
+
+EALIGN (__nearbyint, 4, 0)
+	CALL_MCOUNT 0
+	fabs	fp0,fp1
+	lfd	fp13,.LC0@toc(2)
+	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
+	bgelr	cr7
+	fsub	fp12,fp13,fp13	/* generate 0.0 */
+	fcmpu	cr7,fp1,fp12	/* if (x > 0.0) */
+	ble	cr7, L(lessthanzero)
+	mtfsb0	4*cr7+lt	/* Disable FE_INEXACT exception */
+	fadd	fp1,fp1,fp13	/* x+= TWO52 */
+	fsub	fp1,fp1,fp13	/* x-= TWO52 */
+	fabs	fp1,fp1		/* if (x == 0.0) */
+	mtfsb0	4*cr1+eq	/* Clear any FE_INEXACT exception */
+	blr			/* x = 0.0; */
+L(lessthanzero):
+	bgelr	cr7		/* if (x < 0.0) */
+	mtfsb0	4*cr7+lt
+	fsub	fp1,fp1,fp13	/* x -= TWO52 */
+	fadd	fp1,fp1,fp13	/* x += TWO52 */
+	fnabs	fp1,fp1		/* if (x == 0.0) */
+	mtfsb0	4*cr1+eq
+	blr			/* x = -0.0; */
+END (__nearbyint)
+
+weak_alias (__nearbyint, nearbyint)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__nearbyint, nearbyint)
+strong_alias (__nearbyint, __nearbyintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
new file mode 100644
index 0000000000..c337e7937f
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
@@ -0,0 +1,59 @@
+/* Round to int floating-point values.  PowerPC64 version.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This has been coded in assembler because GCC makes such a mess of it
+   when it's coded in C.  */
+
+#include <sysdep.h>
+
+
+/* float [fp1] nearbyintf(float [fp1]) */
+
+	.section	".toc","aw"
+.LC0:	/* 2**23 */
+	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.section	".text"
+
+EALIGN (__nearbyintf, 4, 0)
+	CALL_MCOUNT 0
+	fabs	fp0,fp1
+	lfs	fp13,.LC0@toc(2)
+	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
+	bgelr	cr7
+	fsubs	fp12,fp13,fp13	/* generate 0.0 */
+	fcmpu	cr7,fp1,fp12	/* if (x > 0.0)  */
+	ble	cr7, L(lessthanzero)
+	mtfsb0	4*cr7+lt	/* Disable FE_INEXACT exception */
+	fadds	fp1,fp1,fp13	/* x += TWO23 */
+	fsubs	fp1,fp1,fp13	/* x -= TWO23 */
+	fabs	fp1,fp1		/* if (x == 0.0) */
+	mtfsb0	4*cr1+eq	/* Clear pending FE_INEXACT exception */
+	blr			/* x = 0.0; */
+L(lessthanzero):
+	bgelr	cr7		/* if (x < 0.0) */
+	mtfsb0	4*cr7+lt	/* Disable FE_INEXACT exception */
+	fsubs	fp1,fp1,fp13	/* x -= TWO23 */
+	fadds	fp1,fp1,fp13	/* x += TWO23 */
+	fnabs	fp1,fp1		/* if (x == 0.0) */
+	mtfsb0	4*cr1+eq	/* Clear pending FE_INEXACT exception */
+	blr			/* x = -0.0; */
+END (__nearbyintf)
+
+weak_alias (__nearbyintf, nearbyintf)
author	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2011-12-29 19:41:37 -0500
committer	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2011-12-29 19:41:37 -0500
commit	a960be3fad233bd721f64dfe62d3f0ee798d26e3 (patch)
tree	c2edb02f8d6150d6cdcc15568b12fc642abc161d
parent	e73988fd47a8ab815e4a4427683ac1098c8ff31f (diff)
download	glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.tar.gz glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.tar.xz glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.zip