diff options
author | Joseph Myers <joseph@codesourcery.com> | 2017-08-25 11:33:04 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2017-08-25 11:33:04 +0000 |
commit | bba5c930947e2b440b7d1eafdd3c9e7ff2d28978 (patch) | |
tree | 8cad09406dad17a6a12d7ccfdcd87a978970e407 /sysdeps/powerpc/powerpc32/power5 | |
parent | 3d7b66f66cb223e899a7ebc0f4c20f13e711c9e0 (diff) | |
download | glibc-bba5c930947e2b440b7d1eafdd3c9e7ff2d28978.tar.gz glibc-bba5c930947e2b440b7d1eafdd3c9e7ff2d28978.tar.xz glibc-bba5c930947e2b440b7d1eafdd3c9e7ff2d28978.zip |
Remove powerpc32 sqrt wrappers.
This patch removes the powerpc32-specific wrappers for sqrt and sqrtf. These wrappers, by adding architecture-specific uses of _LIB_VERSION and __kernel_standard, unnecessarily complicate cleanups of libm error handling. They also do not serve a useful optimization purpose. GCC knows about sqrt as a built-in function, and can generate direct calls to a hardware square root instruction, either on its own, in the -fno-math-errno case, or together with an inline check for the argument being negative and a call to the out-of-line sqrt function for error handling only in that case (and has been able to do so for a long time). Thus in practice the wrapper will only be called only in the case of negative arguments, which is not a case it is useful to optimize for. Tested with build-many-glibcs.py for powerpc-linux-gnu-power4. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S: Remove file. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S: Likewise. * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S: Likewise. * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile (libm-sysdep-routines): Remove w_sqrt_compat-power5, w_sqrt_compat-ppc32, w_sqrtf_compat-power5 and w_sqrtf_compat-ppc32.
Diffstat (limited to 'sysdeps/powerpc/powerpc32/power5')
-rw-r--r-- | sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S | 107 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S | 99 |
2 files changed, 0 insertions, 206 deletions
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S deleted file mode 100644 index ecefdc3283..0000000000 --- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S +++ /dev/null @@ -1,107 +0,0 @@ -/* sqrt function. PowerPC32 version. - Copyright (C) 2007-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <math_ldbl_opt.h> -#include <math-svid-compat.h> - -/* double [fp1] sqrt (double x [fp1]) - Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). - The fsqrt instruction generates the correct value for all inputs and - sets the appropriate floating point exceptions. Extended checking is - only needed to set errno (via __kernel_standard) if the input value - is negative. - - So compare the input value against the absolute value of itself. - This will compare equal unless the value is negative (EDOM) or a NAN, - in which case we branch to the extend wrapper. If equal we can return - the result directly. - - This part of the function looks like a leaf routine, so no need to - stack a frame or execute prologue/epilogue code. It is safe to - branch directly to w_sqrt as long as the input value (f1) is - preserved. Putting the sqrt result into f2 (float parameter 2) - allows passing both the input value and sqrt result into the extended - wrapper so there is no need to recompute. - - This tactic avoids the overhead of stacking a frame for the normal - (non-error) case. Until gcc supports prologue shrink-wrapping - this is the best we can do. */ - - .section ".text" - .machine power4 -EALIGN (__sqrt, 5, 0) - fabs fp0,fp1 - fsqrt fp2,fp1 - fcmpu cr1,fp0,fp1 - bne- cr1,.Lw_sqrt - fmr fp1,fp2 - blr - .align 4 -.Lw_sqrt: - mflr r0 - stwu r1,-16(r1) - cfi_adjust_cfa_offset(16) - fmr fp12,fp2 - stw r0,20(r1) - stw r30,8(r1) - cfi_offset(lr,20-16) - cfi_offset(r30,8-16) -#ifdef SHARED - SETUP_GOT_ACCESS(r30,got_label) - addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha - addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l - lwz r9,_LIB_VERSION@got(30) - lwz r0,0(r9) -#else - lis r9,_LIB_VERSION@ha - lwz r0,_LIB_VERSION@l(r9) -#endif -/* if (_LIB_VERSION == _IEEE_) return z; */ - cmpwi cr7,r0,-1 - beq- cr7,.L4 -/* if (x != x) return z; !isnan*/ - fcmpu cr7,fp1,fp1 - bne- cr7,.L4 -/* if (x < 0.0) - return __kernel_standard (x, x, 26) */ - fmr fp2,fp1 - li r3,26 - bne- cr1,.L11 -.L4: - lwz r0,20(r1) - fmr fp1,fp12 - lwz r30,8(r1) - addi r1,r1,16 - mtlr r0 - blr -.L11: - bl __kernel_standard@plt - fmr fp12,fp1 - b .L4 - END (__sqrt) - -weak_alias (__sqrt, sqrt) - -#ifdef NO_LONG_DOUBLE -weak_alias (__sqrt, sqrtl) -strong_alias (__sqrt, __sqrtl) -#endif -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) -compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) -#endif diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S deleted file mode 100644 index 9ae0c5f47d..0000000000 --- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S +++ /dev/null @@ -1,99 +0,0 @@ -/* sqrtf function. PowerPC32 version. - Copyright (C) 2007-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <math_ldbl_opt.h> -#include <math-svid-compat.h> - -/* float [fp1] sqrts (float x [fp1]) - Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). - The fsqrts instruction generates the correct value for all inputs and - sets the appropriate floating point exceptions. Extended checking is - only needed to set errno (via __kernel_standard) if the input value - is negative. - - So compare the input value against the absolute value of itself. - This will compare equal unless the value is negative (EDOM) or a NAN, - in which case we branch to the extend wrapper. If equal we can return - the result directly. - - This part of the function looks like a leaf routine, so no need to - stack a frame or execute prologue/epilogue code. It is safe to - branch directly to w_sqrt as long as the input value (f1) is - preserved. Putting the sqrt result into f2 (float parameter 2) - allows passing both the input value and sqrt result into the extended - wrapper so there is no need to recompute. - - This tactic avoids the overhead of stacking a frame for the normal - (non-error) case. Until gcc supports prologue shrink-wrapping - this is the best we can do. */ - - .section ".text" - .machine power4 -EALIGN (__sqrtf, 5, 0) - fabs fp0,fp1 - fsqrts fp2,fp1 - fcmpu cr1,fp0,fp1 - bne- cr1,.Lw_sqrtf - fmr fp1,fp2 - blr - .align 4 -.Lw_sqrtf: - mflr r0 - stwu r1,-16(r1) - cfi_adjust_cfa_offset(16) - fmr fp12,fp2 - stw r0,20(r1) - stw r30,8(r1) - cfi_offset(lr,20-16) - cfi_offset(r30,8-16) -#ifdef SHARED - SETUP_GOT_ACCESS(r30,got_label) - addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha - addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l - lwz r9,_LIB_VERSION@got(30) - lwz r0,0(r9) -#else - lis r9,_LIB_VERSION@ha - lwz r0,_LIB_VERSION@l(r9) -#endif -/* if (_LIB_VERSION == _IEEE_) return z; */ - cmpwi cr7,r0,-1 - beq- cr7,.L4 -/* if (x != x, 0) return z; !isnan */ - fcmpu cr7,fp1,fp1 - bne- cr7,.L4 -/* if (x < 0.0) - return __kernel_standard (x, x, 126) */ - fmr fp2,fp1 - li r3,126 - bne- cr1,.L11 -.L4: - lwz r0,20(r1) - fmr fp1,fp12 - lwz r30,8(r1) - addi r1,r1,16 - mtlr r0 - blr -.L11: - bl __kernel_standard@plt - fmr fp12,fp1 - b .L4 - END (__sqrtf) - -weak_alias (__sqrtf, sqrtf) |