diff options
author | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2021-03-10 12:40:56 +0000 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2021-03-11 14:26:36 +0000 |
commit | e898cd1593cc530b0fb29d46a2854dbc552302c0 (patch) | |
tree | ee9287ef70c608cb4dbf41d4a2392beaf00ae7fc | |
parent | 476d692e8a80bb770a3f74876f64bcfa2998a09d (diff) | |
download | glibc-e898cd1593cc530b0fb29d46a2854dbc552302c0.tar.gz glibc-e898cd1593cc530b0fb29d46a2854dbc552302c0.tar.xz glibc-e898cd1593cc530b0fb29d46a2854dbc552302c0.zip |
math: Remove slow paths from atan [BZ #15267]
Remove slow paths from atan. Add ULP annotations. Reviewed-By: Paul Zimmermann <Paul.Zimmermann@inria.fr>
-rw-r--r-- | sysdeps/ieee754/dbl-64/atnat.h | 4 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_atan.c | 196 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/uatan.tbl | 8 |
3 files changed, 27 insertions, 181 deletions
diff --git a/sysdeps/ieee754/dbl-64/atnat.h b/sysdeps/ieee754/dbl-64/atnat.h index af4374019f..2b12481f04 100644 --- a/sysdeps/ieee754/dbl-64/atnat.h +++ b/sysdeps/ieee754/dbl-64/atnat.h @@ -29,7 +29,7 @@ #define M 4 #ifdef BIG_ENDI - static const number + static const mynumber /* polynomial I */ /**/ d3 = {{0xbfd55555, 0x55555555} }, /* -0.333... */ /**/ d5 = {{0x3fc99999, 0x999997fd} }, /* 0.199... */ @@ -79,7 +79,7 @@ #else #ifdef LITTLE_ENDI - static const number + static const mynumber /* polynomial I */ /**/ d3 = {{0x55555555, 0xbfd55555} }, /* -0.333... */ /**/ d5 = {{0x999997fd, 0x3fc99999} }, /* 0.199... */ diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c index 291b988318..977183eb92 100644 --- a/sysdeps/ieee754/dbl-64/s_atan.c +++ b/sysdeps/ieee754/dbl-64/s_atan.c @@ -20,25 +20,15 @@ /* MODULE_NAME: atnat.c */ /* */ /* FUNCTIONS: uatan */ -/* atanMp */ /* signArctan */ /* */ -/* */ -/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h */ -/* mpatan.c mpatan2.c mpsqrt.c */ +/* FILES NEEDED: dla.h endian.h mydefs.h atnat.h */ /* uatan.tbl */ /* */ -/* An ultimate atan() routine. Given an IEEE double machine number x */ -/* it computes the correctly rounded (to nearest) value of atan(x). */ -/* */ -/* Assumption: Machine arithmetic operations are performed in */ -/* round to nearest mode of IEEE 754 standard. */ -/* */ /************************************************************************/ #include <dla.h> -#include "mpa.h" -#include "MathLib.h" +#include "mydefs.h" #include "uatan.tbl" #include "atnat.h" #include <fenv.h> @@ -47,10 +37,8 @@ #include <math.h> #include <fenv_private.h> #include <math-underflow.h> -#include <stap-probe.h> -void __mpatan (mp_no *, mp_no *, int); /* see definition in mpatan.c */ -static double atanMp (double, const int[]); +#define TWO52 0x1.0p52 /* Fix the sign of y and return */ static double @@ -59,17 +47,14 @@ __signArctan (double x, double y) return copysign (y, x); } - -/* An ultimate atan() routine. Given an IEEE double machine number x, */ -/* routine computes the correctly rounded (to nearest) value of atan(x). */ +/* atan with max ULP of ~0.523 based on random sampling. */ double __atan (double x) { - double cor, s1, ss1, s2, ss2, t1, t2, t3, t4, u, u2, u3, - v, vv, w, ww, y, yy, z, zz; + double cor, t1, t2, t3, u, + v, w, ww, y, yy, z; int i, ux, dx; - static const int pr[M] = { 6, 8, 10, 32 }; - number num; + mynumber num; num.d = x; ux = num.i[HIGH_HALF]; @@ -102,36 +87,14 @@ __atan (double x) yy = d3.d + v * yy; yy *= x * v; - if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x)) - return y; - - EMULV (x, x, v, vv); /* v+vv=x^2 */ - - s1 = f17.d + v * f19.d; - s1 = f15.d + v * s1; - s1 = f13.d + v * s1; - s1 = f11.d + v * s1; - s1 *= v; - - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2); - ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2); - if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1)) - return y; - - return atanMp (x, pr); + y = x + yy; + /* Max ULP is 0.511. */ + return y; } } else { /* B <= u < C */ - i = (TWO52 + TWO8 * u) - TWO52; + i = (TWO52 + 256 * u) - TWO52; i -= 16; z = u - cij[i][0].d; yy = cij[i][5].d + z * cij[i][6].d; @@ -141,44 +104,9 @@ __atan (double x) yy *= z; t1 = cij[i][1].d; - if (i < 112) - { - if (i < 48) - u2 = U21; /* u < 1/4 */ - else - u2 = U22; - } /* 1/4 <= u < 1/2 */ - else - { - if (i < 176) - u2 = U23; /* 1/2 <= u < 3/4 */ - else - u2 = U24; - } /* 3/4 <= u <= 1 */ - if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1)) - return __signArctan (x, y); - - z = u - hij[i][0].d; - - s1 = hij[i][14].d + z * hij[i][15].d; - s1 = hij[i][13].d + z * s1; - s1 = hij[i][12].d + z * s1; - s1 = hij[i][11].d + z * s1; - s1 *= z; - - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2)) - return __signArctan (x, y); - - return atanMp (x, pr); + y = t1 + yy; + /* Max ULP is 0.56. */ + return __signArctan (x, y); } } else @@ -188,7 +116,7 @@ __atan (double x) w = 1 / u; EMULV (w, u, t1, t2); ww = w * ((1 - t1) - t2); - i = (TWO52 + TWO8 * w) - TWO52; + i = (TWO52 + 256 * w) - TWO52; i -= 16; z = (w - cij[i][0].d) + ww; @@ -199,37 +127,9 @@ __atan (double x) yy = HPI1 - z * yy; t1 = HPI - cij[i][1].d; - if (i < 112) - u3 = U31; /* w < 1/2 */ - else - u3 = U32; /* w >= 1/2 */ - if ((y = t1 + (yy - u3)) == t1 + (yy + u3)) - return __signArctan (x, y); - - DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4); - t1 = w - hij[i][0].d; - EADD (t1, ww, z, zz); - - s1 = hij[i][14].d + z * hij[i][15].d; - s1 = hij[i][13].d + z * s1; - s1 = hij[i][12].d + z * s1; - s1 = hij[i][11].d + z * s1; - s1 *= z; - - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2); - if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7)) - return __signArctan (x, y); - - return atanMp (x, pr); + y = t1 + yy; + /* Max ULP is 0.503. */ + return __signArctan (x, y); } else { @@ -249,34 +149,9 @@ __atan (double x) ww = w * ((1 - t1) - t2); ESUB (HPI, w, t3, cor); yy = ((HPI1 + cor) - ww) - yy; - if ((y = t3 + (yy - U4)) == t3 + (yy + U4)) - return __signArctan (x, y); - - DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4); - MUL2 (w, ww, w, ww, v, vv, t1, t2); - - s1 = f17.d + v * f19.d; - s1 = f15.d + v * s1; - s1 = f13.d + v * s1; - s1 = f11.d + v * s1; - s1 *= v; - - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2); - ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2); - SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2); - - if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8)) - return __signArctan (x, y); - - return atanMp (x, pr); + y = t3 + yy; + /* Max ULP is 0.5003. */ + return __signArctan (x, y); } else { @@ -290,35 +165,6 @@ __atan (double x) } } - /* Final stages. Compute atan(x) by multiple precision arithmetic */ -static double -atanMp (double x, const int pr[]) -{ - mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1; - double y1, y2; - int i, p; - - for (i = 0; i < M; i++) - { - p = pr[i]; - __dbl_mp (x, &mpx, p); - __mpatan (&mpx, &mpy, p); - __dbl_mp (u9[i].d, &mpt1, p); - __mul (&mpy, &mpt1, &mperr, p); - __add (&mpy, &mperr, &mpy1, p); - __sub (&mpy, &mperr, &mpy2, p); - __mp_dbl (&mpy1, &y1, p); - __mp_dbl (&mpy2, &y2, p); - if (y1 == y2) - { - LIBC_PROBE (slowatan, 3, &p, &x, &y1); - return y1; - } - } - LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1); - return y1; /*if impossible to do exact computing */ -} - #ifndef __atan libm_alias_double (__atan, atan) #endif diff --git a/sysdeps/ieee754/dbl-64/uatan.tbl b/sysdeps/ieee754/dbl-64/uatan.tbl index 8a477caa38..fdc3e53304 100644 --- a/sysdeps/ieee754/dbl-64/uatan.tbl +++ b/sysdeps/ieee754/dbl-64/uatan.tbl @@ -25,7 +25,7 @@ #ifdef BIG_ENDI - static const number + static const mynumber cij[241][7] = { /* x0,cij for (1/16,1) */ /**/ {{{0X3FB04006, 0X65E0244E} }, /**/ {{0X3FB03A73, 0X7B53DD20} }, @@ -1716,7 +1716,7 @@ /**/ {{0XBF99B9A7, 0X18A3BA58} } }, }; - static const number + static const mynumber hij[241][16] = { /* x0,hij for (1/16,1) */ /**/ {{{0x3fb04000, 0x00000000} }, /**/ {{0x3fb03a6d, 0x1c06693d} }, @@ -5579,7 +5579,7 @@ #else #ifdef LITTLE_ENDI - static const number + static const mynumber cij[241][7] = { /* x0,cij for (1/16,1) */ /**/ {{{0X65E0244E, 0X3FB04006} }, /**/ {{0X7B53DD20, 0X3FB03A73} }, @@ -7270,7 +7270,7 @@ /**/ {{0X18A3BA58, 0XBF99B9A7} } }, }; - static const number + static const mynumber hij[241][16] = { /* x0,hij for (1/16,1) */ /**/ {{{0x00000000, 0x3fb04000} }, /**/ {{0x1c06693d, 0x3fb03a6d} }, |