diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-03-29 18:15:28 -0500 |
---|---|---|
committer | Ryan S. Arnold <rsa@linux.vnet.ibm.com> | 2013-07-25 15:35:22 -0500 |
commit | 987322bc0b170570a7bd539480252453fcc7a6f5 (patch) | |
tree | 7270e0359a3f15e2d3938d6ece41f5cfe1245ec0 | |
parent | ace889170837e550871cf0d33f88ba2470c7d834 (diff) | |
download | glibc-987322bc0b170570a7bd539480252453fcc7a6f5.tar.gz glibc-987322bc0b170570a7bd539480252453fcc7a6f5.tar.xz glibc-987322bc0b170570a7bd539480252453fcc7a6f5.zip |
PowerPC: remove branch prediction from rint implementation
The branch prediction hints is actually hurts performance in this case. The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a general floating point function, expected input is not bounded and then it is better to let the hardware handle the branches. (backported from commit 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f) This backport does not include the benchmark tests from the original commit.
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 |
4 files changed, 12 insertions, 12 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S index 0ab9e6cee9..c28e7f65e7 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S @@ -45,14 +45,14 @@ ENTRY (__rint) fsub fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S index ddb47db62f..69aed9c116 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S @@ -41,14 +41,14 @@ ENTRY (__rintf) fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S index db624051a8..560905a297 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0) fsub fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S index 248649dc5c..c120d913b0 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0) fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fnabs fp1,fp1 /* if (x == 0.0) */ |