From e898cd1593cc530b0fb29d46a2854dbc552302c0 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 10 Mar 2021 12:40:56 +0000 Subject: math: Remove slow paths from atan [BZ #15267] Remove slow paths from atan. Add ULP annotations. Reviewed-By: Paul Zimmermann --- sysdeps/ieee754/dbl-64/s_atan.c | 196 +++++----------------------------------- 1 file changed, 21 insertions(+), 175 deletions(-) (limited to 'sysdeps/ieee754/dbl-64/s_atan.c') diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c index 291b988318..977183eb92 100644 --- a/sysdeps/ieee754/dbl-64/s_atan.c +++ b/sysdeps/ieee754/dbl-64/s_atan.c @@ -20,25 +20,15 @@ /* MODULE_NAME: atnat.c */ /* */ /* FUNCTIONS: uatan */ -/* atanMp */ /* signArctan */ /* */ -/* */ -/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h */ -/* mpatan.c mpatan2.c mpsqrt.c */ +/* FILES NEEDED: dla.h endian.h mydefs.h atnat.h */ /* uatan.tbl */ /* */ -/* An ultimate atan() routine. Given an IEEE double machine number x */ -/* it computes the correctly rounded (to nearest) value of atan(x). */ -/* */ -/* Assumption: Machine arithmetic operations are performed in */ -/* round to nearest mode of IEEE 754 standard. */ -/* */ /************************************************************************/ #include -#include "mpa.h" -#include "MathLib.h" +#include "mydefs.h" #include "uatan.tbl" #include "atnat.h" #include @@ -47,10 +37,8 @@ #include #include #include -#include -void __mpatan (mp_no *, mp_no *, int); /* see definition in mpatan.c */ -static double atanMp (double, const int[]); +#define TWO52 0x1.0p52 /* Fix the sign of y and return */ static double @@ -59,17 +47,14 @@ __signArctan (double x, double y) return copysign (y, x); } - -/* An ultimate atan() routine. Given an IEEE double machine number x, */ -/* routine computes the correctly rounded (to nearest) value of atan(x). */ +/* atan with max ULP of ~0.523 based on random sampling. */ double __atan (double x) { - double cor, s1, ss1, s2, ss2, t1, t2, t3, t4, u, u2, u3, - v, vv, w, ww, y, yy, z, zz; + double cor, t1, t2, t3, u, + v, w, ww, y, yy, z; int i, ux, dx; - static const int pr[M] = { 6, 8, 10, 32 }; - number num; + mynumber num; num.d = x; ux = num.i[HIGH_HALF]; @@ -102,36 +87,14 @@ __atan (double x) yy = d3.d + v * yy; yy *= x * v; - if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x)) - return y; - - EMULV (x, x, v, vv); /* v+vv=x^2 */ - - s1 = f17.d + v * f19.d; - s1 = f15.d + v * s1; - s1 = f13.d + v * s1; - s1 = f11.d + v * s1; - s1 *= v; - - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2); - ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2); - if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1)) - return y; - - return atanMp (x, pr); + y = x + yy; + /* Max ULP is 0.511. */ + return y; } } else { /* B <= u < C */ - i = (TWO52 + TWO8 * u) - TWO52; + i = (TWO52 + 256 * u) - TWO52; i -= 16; z = u - cij[i][0].d; yy = cij[i][5].d + z * cij[i][6].d; @@ -141,44 +104,9 @@ __atan (double x) yy *= z; t1 = cij[i][1].d; - if (i < 112) - { - if (i < 48) - u2 = U21; /* u < 1/4 */ - else - u2 = U22; - } /* 1/4 <= u < 1/2 */ - else - { - if (i < 176) - u2 = U23; /* 1/2 <= u < 3/4 */ - else - u2 = U24; - } /* 3/4 <= u <= 1 */ - if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1)) - return __signArctan (x, y); - - z = u - hij[i][0].d; - - s1 = hij[i][14].d + z * hij[i][15].d; - s1 = hij[i][13].d + z * s1; - s1 = hij[i][12].d + z * s1; - s1 = hij[i][11].d + z * s1; - s1 *= z; - - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2)) - return __signArctan (x, y); - - return atanMp (x, pr); + y = t1 + yy; + /* Max ULP is 0.56. */ + return __signArctan (x, y); } } else @@ -188,7 +116,7 @@ __atan (double x) w = 1 / u; EMULV (w, u, t1, t2); ww = w * ((1 - t1) - t2); - i = (TWO52 + TWO8 * w) - TWO52; + i = (TWO52 + 256 * w) - TWO52; i -= 16; z = (w - cij[i][0].d) + ww; @@ -199,37 +127,9 @@ __atan (double x) yy = HPI1 - z * yy; t1 = HPI - cij[i][1].d; - if (i < 112) - u3 = U31; /* w < 1/2 */ - else - u3 = U32; /* w >= 1/2 */ - if ((y = t1 + (yy - u3)) == t1 + (yy + u3)) - return __signArctan (x, y); - - DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4); - t1 = w - hij[i][0].d; - EADD (t1, ww, z, zz); - - s1 = hij[i][14].d + z * hij[i][15].d; - s1 = hij[i][13].d + z * s1; - s1 = hij[i][12].d + z * s1; - s1 = hij[i][11].d + z * s1; - s1 *= z; - - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2); - if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7)) - return __signArctan (x, y); - - return atanMp (x, pr); + y = t1 + yy; + /* Max ULP is 0.503. */ + return __signArctan (x, y); } else { @@ -249,34 +149,9 @@ __atan (double x) ww = w * ((1 - t1) - t2); ESUB (HPI, w, t3, cor); yy = ((HPI1 + cor) - ww) - yy; - if ((y = t3 + (yy - U4)) == t3 + (yy + U4)) - return __signArctan (x, y); - - DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4); - MUL2 (w, ww, w, ww, v, vv, t1, t2); - - s1 = f17.d + v * f19.d; - s1 = f15.d + v * s1; - s1 = f13.d + v * s1; - s1 = f11.d + v * s1; - s1 *= v; - - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2); - ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2); - SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2); - - if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8)) - return __signArctan (x, y); - - return atanMp (x, pr); + y = t3 + yy; + /* Max ULP is 0.5003. */ + return __signArctan (x, y); } else { @@ -290,35 +165,6 @@ __atan (double x) } } - /* Final stages. Compute atan(x) by multiple precision arithmetic */ -static double -atanMp (double x, const int pr[]) -{ - mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1; - double y1, y2; - int i, p; - - for (i = 0; i < M; i++) - { - p = pr[i]; - __dbl_mp (x, &mpx, p); - __mpatan (&mpx, &mpy, p); - __dbl_mp (u9[i].d, &mpt1, p); - __mul (&mpy, &mpt1, &mperr, p); - __add (&mpy, &mperr, &mpy1, p); - __sub (&mpy, &mperr, &mpy2, p); - __mp_dbl (&mpy1, &y1, p); - __mp_dbl (&mpy2, &y2, p); - if (y1 == y2) - { - LIBC_PROBE (slowatan, 3, &p, &x, &y1); - return y1; - } - } - LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1); - return y1; /*if impossible to do exact computing */ -} - #ifndef __atan libm_alias_double (__atan, atan) #endif -- cgit 1.4.1