about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2013-03-29 18:15:28 -0500
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2013-04-01 06:36:51 -0500
commit60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f (patch)
tree479eb937a9496619acfb24db9a6a48831f9c3fb3
parent6142896d53d0e8e8abe5dd0494380c45acc1a919 (diff)
downloadglibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar.gz
glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar.xz
glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.zip
PowerPC: remove branch prediction from rint implementation
The branch prediction hints is actually hurts performance in this case.
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52'
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a
general floating point function, expected input is not bounded and then
it is better to let the hardware handle the branches.
-rw-r--r--benchtests/Makefile7
-rw-r--r--benchtests/rint-inputs4
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rint.S6
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rintf.S6
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rint.S6
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rintf.S6
6 files changed, 22 insertions, 13 deletions
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 74938b925c..cc54b81faa 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -43,7 +43,7 @@
 #   See pow-inputs for an example.
 
 subdir := benchtests
-bench := exp pow
+bench := exp pow rint
 
 exp-ITER = 100000
 exp-ARGLIST = double
@@ -55,5 +55,10 @@ pow-ARGLIST = double:double
 pow-RET = double
 LDFLAGS-bench-pow = -lm
 
+rint-ITER = 250000000
+rint-ARGLIST = double
+rint-RET = double
+LDFLAGS-bench-rint = -lm
+
 include ../Makeconfig
 include ../Rules
diff --git a/benchtests/rint-inputs b/benchtests/rint-inputs
new file mode 100644
index 0000000000..a5f83dc8f9
--- /dev/null
+++ b/benchtests/rint-inputs
@@ -0,0 +1,4 @@
+78.5
+-78.5
+4503599627370497.0
+-4503599627370497.0
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index f3cd036680..f04055f461 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -45,14 +45,14 @@ ENTRY (__rint)
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 247dd4a14d..e0301af2e7 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -41,14 +41,14 @@ ENTRY (__rintf)
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index f3339727f4..57e3759bf0 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index 26b08721c7..cb28ec748d 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */