Remove slow paths from pow

Remove the slow paths from pow. Like several other double precision math functions, pow is exactly rounded. This is not required from math functions and causes major overheads as it requires multiple fallbacks using higher precision arithmetic if a result is close to 0.5ULP. Ridiculous slowdowns of up to 100000x have been reported when the highest precision path triggers. All GLIBC math tests pass on AArch64 and x64 (with ULP of pow set to 1). The worst case error is ~0.506ULP. A simple test over a few hundred million values shows pow is 10% faster on average. This fixes BZ #13932. [BZ #13932] * sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove. * benchtests/pow-inputs: Update comment for slow path cases. * manual/probes.texi (slowpow_p10): Delete removed probe. (slowpow_p10): Likewise. * math/Makefile: Remove halfulp.c and slowpow.c. * sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1. * sysdeps/generic/math_private.h (__exp1): Remove error argument. (__halfulp): Remove. (__slowpow): Remove. * sysdeps/i386/fpu/halfulp.c: Delete file. * sysdeps/i386/fpu/slowpow.c: Likewise. * sysdeps/ia64/fpu/halfulp.c: Likewise. * sysdeps/ia64/fpu/slowpow.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument, improve comments and add error analysis. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis. (power1): Remove function: (log1): Remove error argument, add error analysis. (my_log2): Remove function. * sysdeps/ieee754/dbl-64/halfulp.c: Delete file. * sysdeps/ieee754/dbl-64/slowpow.c: Likewise. * sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise. * sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise. * sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c. * sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1. * sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c, slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c. * sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define. * sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise. * sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file. * sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
author: Wilco Dijkstra <wdijkstr@arm.com> 2018-02-12 10:42:42 +0000
committer: Wilco Dijkstra <wdijkstr@arm.com> 2018-02-12 10:47:09 +0000
commit: c3d466cba1692708a19c6ff829d0386c83a0c6e5 (patch)
tree: d01ce6103dc25d3b662898c3429b8b103b8d3155 /sysdeps/ieee754/dbl-64/slowpow.c
parent: 7bb087bd7bfe3616c4c0974a3f7352b593353ea5 (diff)
download: glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.gz
glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.xz
glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.zip
1 files changed, 0 insertions, 125 deletions
diff --git a/sysdeps/ieee754/dbl-64/slowpow.c b/sysdeps/ieee754/dbl-64/slowpow.c
deleted file mode 100644
index d7c7fb3eb8..0000000000
--- a/sysdeps/ieee754/dbl-64/slowpow.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*************************************************************************/
-/* MODULE_NAME:slowpow.c                                                 */
-/*                                                                       */
-/* FUNCTION:slowpow                                                      */
-/*                                                                       */
-/*FILES NEEDED:mpa.h                                                     */
-/*             mpa.c mpexp.c mplog.c halfulp.c                           */
-/*                                                                       */
-/* Given two IEEE double machine numbers y,x , routine  computes the     */
-/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
-/* multiplication (in halfulp.c) or if result isn't accurate enough      */
-/* then routine converts x and y into multi-precision doubles     and    */
-/* calls to mpexp routine                                                */
-/*************************************************************************/
-
-#include "mpa.h"
-#include <math_private.h>
-
-#include <stap-probe.h>
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-void __mpexp (mp_no *x, mp_no *y, int p);
-void __mplog (mp_no *x, mp_no *y, int p);
-double ulog (double);
-double __halfulp (double x, double y);
-
-double
-SECTION
-__slowpow (double x, double y, double z)
-{
-  double res, res1;
-  mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
-  static const mp_no eps = {-3, {1.0, 4.0}};
-  int p;
-
-  /* __HALFULP returns -10 or X^Y.  */
-  res = __halfulp (x, y);
-
-  /* Return if the result was computed by __HALFULP.  */
-  if (res >= 0)
-    return res;
-
-  /* Compute pow as long double.  This is currently only used by powerpc, where
-     one may get 106 bits of accuracy.  */
-#ifdef USE_LONG_DOUBLE_FOR_MP
-  long double ldw, ldz, ldpp;
-  static const long double ldeps = 0x4.0p-96;
-
-  ldz = __ieee754_logl ((long double) x);
-  ldw = (long double) y *ldz;
-  ldpp = __ieee754_expl (ldw);
-  res = (double) (ldpp + ldeps);
-  res1 = (double) (ldpp - ldeps);
-
-  /* Return the result if it is accurate enough.  */
-  if (res == res1)
-    return res;
-#endif
-
-  /* Or else, calculate using multiple precision.  P = 10 implies accuracy of
-     240 bits accuracy, since MP_NO has a radix of 2^24.  */
-  p = 10;
-  __dbl_mp (x, &mpx, p);
-  __dbl_mp (y, &mpy, p);
-  __dbl_mp (z, &mpz, p);
-
-  /* z = x ^ y
-     log (z) = y * log (x)
-     z = exp (y * log (x))  */
-  __mplog (&mpx, &mpz, p);
-  __mul (&mpy, &mpz, &mpw, p);
-  __mpexp (&mpw, &mpp, p);
-
-  /* Add and subtract EPS to ensure that the result remains unchanged, i.e. we
-     have last bit accuracy.  */
-  __add (&mpp, &eps, &mpr, p);
-  __mp_dbl (&mpr, &res, p);
-  __sub (&mpp, &eps, &mpr1, p);
-  __mp_dbl (&mpr1, &res1, p);
-  if (res == res1)
-    {
-      /* Track how often we get to the slow pow code plus
-	 its input/output values.  */
-      LIBC_PROBE (slowpow_p10, 4, &x, &y, &z, &res);
-      return res;
-    }
-
-  /* If we don't, then we repeat using a higher precision.  768 bits of
-     precision ought to be enough for anybody.  */
-  p = 32;
-  __dbl_mp (x, &mpx, p);
-  __dbl_mp (y, &mpy, p);
-  __dbl_mp (z, &mpz, p);
-  __mplog (&mpx, &mpz, p);
-  __mul (&mpy, &mpz, &mpw, p);
-  __mpexp (&mpw, &mpp, p);
-  __mp_dbl (&mpp, &res, p);
-
-  /* Track how often we get to the uber-slow pow code plus
-     its input/output values.  */
-  LIBC_PROBE (slowpow_p32, 4, &x, &y, &z, &res);
-
-  return res;
-}
author	Wilco Dijkstra <wdijkstr@arm.com>	2018-02-12 10:42:42 +0000
committer	Wilco Dijkstra <wdijkstr@arm.com>	2018-02-12 10:47:09 +0000
commit	c3d466cba1692708a19c6ff829d0386c83a0c6e5 (patch)
tree	d01ce6103dc25d3b662898c3429b8b103b8d3155 /sysdeps/ieee754/dbl-64/slowpow.c
parent	7bb087bd7bfe3616c4c0974a3f7352b593353ea5 (diff)
download	glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.gz glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.xz glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.zip