diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2011-12-29 19:41:37 -0500 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2011-12-29 19:41:37 -0500 |
commit | a960be3fad233bd721f64dfe62d3f0ee798d26e3 (patch) | |
tree | c2edb02f8d6150d6cdcc15568b12fc642abc161d | |
parent | e73988fd47a8ab815e4a4427683ac1098c8ff31f (diff) | |
download | glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.tar.gz glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.tar.xz glibc-a960be3fad233bd721f64dfe62d3f0ee798d26e3.zip |
PowerPC - Optimization for nearbyint[f]
This patch provides throughput boost for the nearbyint[f] functions for POWER. For POWER7, it improves performance for nearbyintf from 5 (ppc32) to 6 times (ppc64) and for nearbyint from 2.5 up to 5 times. For POWER6 it improves nearbyintf up to 2x (ppc64) and nearbyint up to 4x.
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | math/libm-test.inc | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S | 80 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S | 70 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S | 68 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S | 59 |
6 files changed, 293 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog index a4b9cb04b2..ce94df9271 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2011-11-18 Adhemerval Zanella <azanella@linux.vnet.ibm.com> + + * math/libm-test.inc: Added more nerabyint tests. + * sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S: New file: nearbyint + optimized for PPC32. + * sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S: New file: nearbyintf + optimized for PPC32. + * sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S: New file: nearbyint + optimized for PPC64. + * sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S: New file: nearbyintf + optimized for PPC64. + 2011-10-22 Andreas Schwab <schwab@linux-m68k.org> * sysdeps/ieee754/ldbl-128ibm/s_atanl.c: Include <math.h> before diff --git a/math/libm-test.inc b/math/libm-test.inc index c6ed7a39fe..54dbdf100b 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -4607,6 +4607,10 @@ nearbyint_test (void) TEST_f_f (nearbyint, minus_infty, minus_infty); TEST_f_f (nearbyint, nan_value, nan_value); + /* Subnormal values */ + TEST_f_f (nearbyint, -8.98847e+307, -8.98847e+307); + TEST_f_f (nearbyint, -4.45015e-308, minus_zero); + /* Default rounding mode is round to nearest. */ TEST_f_f (nearbyint, 0.5, 0.0); TEST_f_f (nearbyint, 1.5, 2.0); diff --git a/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S b/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S new file mode 100644 index 0000000000..0102aa3318 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S @@ -0,0 +1,80 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 /* TWO52: 2**52 */ + + .section ".text" +ENTRY (__nearbyint) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + bcl 20,31,1f +1: mflr r9 + addis r9,r9,.LC0-1b@ha + lfs fp13,.LC0-1b@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52 */ + bgelr cr7 + fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ + ble cr7,L(lessthanzero) + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp0,fp1,fp13 /* x += TWO52 */ + fsub fp1,fp0,fp13 /* x -= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0 */ + mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ + blr +L(lessthanzero): + bgelr cr7 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsub fp0,fp13,fp1 /* x -= TWO52 */ + fsub fp0,fp0,fp13 /* x += TWO52 */ + fneg fp1,fp0 /* if (x == 0.0) */ + mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ + blr +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyintl) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S b/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..519022806b --- /dev/null +++ b/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S @@ -0,0 +1,70 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1] x) */ + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: + .long 0x4B000000 /* TWO23: 2**23 */ + + .section ".text" +ENTRY (__nearbyintf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + bcl 20,31,1f +1: mflr r9 + addis r9,r9,.LC0-1b@ha + lfs fp13,.LC0-1b@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23 */ + bgelr cr7 + fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ + ble cr7,L(lessthanzero) + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp0,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp0,fp13 /* x -= TWO23 */ + mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ + blr +L(lessthanzero): + bgelr cr7 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp0,fp13,fp1 /* x -= TWO23 */ + fsubs fp0,fp0,fp13 /* x += TWO23 */ + fneg fp1,fp0 /* if (x == 0.0) */ + mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ + blr +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S new file mode 100644 index 0000000000..436160f48e --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S @@ -0,0 +1,68 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__nearbyint, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfd fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bgelr cr7 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp1,fp1,fp13 /* x+= TWO52 */ + fsub fp1,fp1,fp13 /* x-= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mtfsb0 4*cr7+lt + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsb0 4*cr1+eq + blr /* x = -0.0; */ +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyint) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..c337e7937f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S @@ -0,0 +1,59 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1]) */ + + .section ".toc","aw" +.LC0: /* 2**23 */ + .tc FD_4b000000_0[TC],0x4b00000000000000 + .section ".text" + +EALIGN (__nearbyintf, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfs fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bgelr cr7 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsb0 4*cr1+eq /* Clear pending FE_INEXACT exception */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsb0 4*cr1+eq /* Clear pending FE_INEXACT exception */ + blr /* x = -0.0; */ +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) |