From e898cd1593cc530b0fb29d46a2854dbc552302c0 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Wed, 10 Mar 2021 12:40:56 +0000
Subject: math: Remove slow paths from atan [BZ #15267]

Remove slow paths from atan. Add ULP annotations.

Reviewed-By: Paul Zimmermann <Paul.Zimmermann@inria.fr>
---
 sysdeps/ieee754/dbl-64/s_atan.c | 196 +++++-----------------------------------
 1 file changed, 21 insertions(+), 175 deletions(-)

(limited to 'sysdeps/ieee754/dbl-64/s_atan.c')

diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c
index 291b988318..977183eb92 100644
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -20,25 +20,15 @@
 /*  MODULE_NAME: atnat.c                                                */
 /*                                                                      */
 /*  FUNCTIONS:  uatan                                                   */
-/*              atanMp                                                  */
 /*              signArctan                                              */
 /*                                                                      */
-/*                                                                      */
-/*  FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h                 */
-/*                mpatan.c mpatan2.c mpsqrt.c                           */
+/*  FILES NEEDED: dla.h endian.h mydefs.h atnat.h                       */
 /*                uatan.tbl                                             */
 /*                                                                      */
-/* An ultimate atan() routine. Given an IEEE double machine number x    */
-/* it computes the correctly rounded (to nearest) value of atan(x).     */
-/*                                                                      */
-/* Assumption: Machine arithmetic operations are performed in           */
-/* round to nearest mode of IEEE 754 standard.                          */
-/*                                                                      */
 /************************************************************************/
 
 #include <dla.h>
-#include "mpa.h"
-#include "MathLib.h"
+#include "mydefs.h"
 #include "uatan.tbl"
 #include "atnat.h"
 #include <fenv.h>
@@ -47,10 +37,8 @@
 #include <math.h>
 #include <fenv_private.h>
 #include <math-underflow.h>
-#include <stap-probe.h>
 
-void __mpatan (mp_no *, mp_no *, int);	/* see definition in mpatan.c */
-static double atanMp (double, const int[]);
+#define  TWO52     0x1.0p52
 
   /* Fix the sign of y and return */
 static double
@@ -59,17 +47,14 @@ __signArctan (double x, double y)
   return copysign (y, x);
 }
 
-
-/* An ultimate atan() routine. Given an IEEE double machine number x,    */
-/* routine computes the correctly rounded (to nearest) value of atan(x). */
+/* atan with max ULP of ~0.523 based on random sampling.  */
 double
 __atan (double x)
 {
-  double cor, s1, ss1, s2, ss2, t1, t2, t3, t4, u, u2, u3,
-	 v, vv, w, ww, y, yy, z, zz;
+  double cor, t1, t2, t3, u,
+	 v, w, ww, y, yy, z;
   int i, ux, dx;
-  static const int pr[M] = { 6, 8, 10, 32 };
-  number num;
+  mynumber num;
 
   num.d = x;
   ux = num.i[HIGH_HALF];
@@ -102,36 +87,14 @@ __atan (double x)
 	      yy = d3.d + v * yy;
 	      yy *= x * v;
 
-	      if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x))
-		return y;
-
-	      EMULV (x, x, v, vv);	/* v+vv=x^2 */
-
-	      s1 = f17.d + v * f19.d;
-	      s1 = f15.d + v * s1;
-	      s1 = f13.d + v * s1;
-	      s1 = f11.d + v * s1;
-	      s1 *= v;
-
-	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2);
-	      ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2);
-	      if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1))
-		return y;
-
-	      return atanMp (x, pr);
+	      y = x + yy;
+	      /* Max ULP is 0.511.  */
+	      return y;
 	    }
 	}
       else
 	{			/* B <= u < C */
-	  i = (TWO52 + TWO8 * u) - TWO52;
+	  i = (TWO52 + 256 * u) - TWO52;
 	  i -= 16;
 	  z = u - cij[i][0].d;
 	  yy = cij[i][5].d + z * cij[i][6].d;
@@ -141,44 +104,9 @@ __atan (double x)
 	  yy *= z;
 
 	  t1 = cij[i][1].d;
-	  if (i < 112)
-	    {
-	      if (i < 48)
-		u2 = U21;	/* u < 1/4        */
-	      else
-		u2 = U22;
-	    }			/* 1/4 <= u < 1/2 */
-	  else
-	    {
-	      if (i < 176)
-		u2 = U23;	/* 1/2 <= u < 3/4 */
-	      else
-		u2 = U24;
-	    }			/* 3/4 <= u <= 1  */
-	  if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1))
-	    return __signArctan (x, y);
-
-	  z = u - hij[i][0].d;
-
-	  s1 = hij[i][14].d + z * hij[i][15].d;
-	  s1 = hij[i][13].d + z * s1;
-	  s1 = hij[i][12].d + z * s1;
-	  s1 = hij[i][11].d + z * s1;
-	  s1 *= z;
-
-	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
-	  if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2))
-	    return __signArctan (x, y);
-
-	  return atanMp (x, pr);
+	  y = t1 + yy;
+	  /* Max ULP is 0.56.  */
+	  return __signArctan (x, y);
 	}
     }
   else
@@ -188,7 +116,7 @@ __atan (double x)
 	  w = 1 / u;
 	  EMULV (w, u, t1, t2);
 	  ww = w * ((1 - t1) - t2);
-	  i = (TWO52 + TWO8 * w) - TWO52;
+	  i = (TWO52 + 256 * w) - TWO52;
 	  i -= 16;
 	  z = (w - cij[i][0].d) + ww;
 
@@ -199,37 +127,9 @@ __atan (double x)
 	  yy = HPI1 - z * yy;
 
 	  t1 = HPI - cij[i][1].d;
-	  if (i < 112)
-	    u3 = U31;           /* w <  1/2 */
-	  else
-	    u3 = U32;           /* w >= 1/2 */
-	  if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
-	    return __signArctan (x, y);
-
-	  DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4);
-	  t1 = w - hij[i][0].d;
-	  EADD (t1, ww, z, zz);
-
-	  s1 = hij[i][14].d + z * hij[i][15].d;
-	  s1 = hij[i][13].d + z * s1;
-	  s1 = hij[i][12].d + z * s1;
-	  s1 = hij[i][11].d + z * s1;
-	  s1 *= z;
-
-	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
-	  SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2);
-	  if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7))
-	    return __signArctan (x, y);
-
-	  return atanMp (x, pr);
+	  y = t1 + yy;
+	  /* Max ULP is 0.503.  */
+	  return __signArctan (x, y);
 	}
       else
 	{
@@ -249,34 +149,9 @@ __atan (double x)
 	      ww = w * ((1 - t1) - t2);
 	      ESUB (HPI, w, t3, cor);
 	      yy = ((HPI1 + cor) - ww) - yy;
-	      if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
-		return __signArctan (x, y);
-
-	      DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4);
-	      MUL2 (w, ww, w, ww, v, vv, t1, t2);
-
-	      s1 = f17.d + v * f19.d;
-	      s1 = f15.d + v * s1;
-	      s1 = f13.d + v * s1;
-	      s1 = f11.d + v * s1;
-	      s1 *= v;
-
-	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2);
-	      ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2);
-	      SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2);
-
-	      if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8))
-		return __signArctan (x, y);
-
-	      return atanMp (x, pr);
+	      y = t3 + yy;
+	      /* Max ULP is 0.5003.  */
+	      return __signArctan (x, y);
 	    }
 	  else
 	    {
@@ -290,35 +165,6 @@ __atan (double x)
     }
 }
 
- /* Final stages. Compute atan(x) by multiple precision arithmetic */
-static double
-atanMp (double x, const int pr[])
-{
-  mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1;
-  double y1, y2;
-  int i, p;
-
-  for (i = 0; i < M; i++)
-    {
-      p = pr[i];
-      __dbl_mp (x, &mpx, p);
-      __mpatan (&mpx, &mpy, p);
-      __dbl_mp (u9[i].d, &mpt1, p);
-      __mul (&mpy, &mpt1, &mperr, p);
-      __add (&mpy, &mperr, &mpy1, p);
-      __sub (&mpy, &mperr, &mpy2, p);
-      __mp_dbl (&mpy1, &y1, p);
-      __mp_dbl (&mpy2, &y2, p);
-      if (y1 == y2)
-	{
-	  LIBC_PROBE (slowatan, 3, &p, &x, &y1);
-	  return y1;
-	}
-    }
-  LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
-  return y1;			/*if impossible to do exact computing */
-}
-
 #ifndef __atan
 libm_alias_double (__atan, atan)
 #endif
-- 
cgit 1.4.1