diff options
author | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2021-03-10 12:41:20 +0000 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2021-03-11 14:26:36 +0000 |
commit | 4e1a870b9a4c664c9bc79afd07276ab660abd73f (patch) | |
tree | 3a03af4ce446e9c419718300b744305ffef8c66f | |
parent | e898cd1593cc530b0fb29d46a2854dbc552302c0 (diff) | |
download | glibc-4e1a870b9a4c664c9bc79afd07276ab660abd73f.tar.gz glibc-4e1a870b9a4c664c9bc79afd07276ab660abd73f.tar.xz glibc-4e1a870b9a4c664c9bc79afd07276ab660abd73f.zip |
math: Remove slow paths from atan2 [BZ #15267]
Remove slow paths from atan2. Add ULP annotations. Reviewed-By: Paul Zimmermann <Paul.Zimmermann@inria.fr>
-rw-r--r-- | sysdeps/ieee754/dbl-64/atnat2.h | 4 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/e_atan2.c | 330 |
2 files changed, 40 insertions, 294 deletions
diff --git a/sysdeps/ieee754/dbl-64/atnat2.h b/sysdeps/ieee754/dbl-64/atnat2.h index f1c0d23f9e..f9f8debd55 100644 --- a/sysdeps/ieee754/dbl-64/atnat2.h +++ b/sysdeps/ieee754/dbl-64/atnat2.h @@ -34,7 +34,7 @@ #define MM 5 #ifdef BIG_ENDI - static const number + static const mynumber /* polynomial I */ /**/ d3 = {{0xbfd55555, 0x55555555} }, /* -0.333... */ /**/ d5 = {{0x3fc99999, 0x999997fd} }, /* 0.199... */ @@ -96,7 +96,7 @@ #else #ifdef LITTLE_ENDI - static const number + static const mynumber /* polynomial I */ /**/ d3 = {{0x55555555, 0xbfd55555} }, /* -0.333... */ /**/ d5 = {{0x999997fd, 0x3fc99999} }, /* 0.199... */ diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c index e6b98142fb..b77ad2dc6b 100644 --- a/sysdeps/ieee754/dbl-64/e_atan2.c +++ b/sysdeps/ieee754/dbl-64/e_atan2.c @@ -20,25 +20,14 @@ /* MODULE_NAME: atnat2.c */ /* */ /* FUNCTIONS: uatan2 */ -/* atan2Mp */ /* signArctan2 */ -/* normalized */ /* */ -/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat2.h */ -/* mpatan.c mpatan2.c mpsqrt.c */ +/* FILES NEEDED: dla.h endian.h mydefs.h atnat2.h */ /* uatan.tbl */ /* */ -/* An ultimate atan2() routine. Given two IEEE double machine numbers y,*/ -/* x it computes the correctly rounded (to nearest) value of atan2(y,x).*/ -/* */ -/* Assumption: Machine arithmetic operations are performed in */ -/* round to nearest mode of IEEE 754 standard. */ -/* */ /************************************************************************/ #include <dla.h> -#include "mpa.h" -#include "MathLib.h" #include "mydefs.h" #include "uatan.tbl" #include "atnat2.h" @@ -48,20 +37,15 @@ #include <math-barriers.h> #include <math_private.h> #include <fenv_private.h> -#include <stap-probe.h> #include <libm-alias-finite.h> #ifndef SECTION # define SECTION #endif -/************************************************************************/ -/* An ultimate atan2 routine. Given two IEEE double machine numbers y,x */ -/* it computes the correctly rounded (to nearest) value of atan2(y,x). */ -/* Assumption: Machine arithmetic operations are performed in */ -/* round to nearest mode of IEEE 754 standard. */ -/************************************************************************/ -static double atan2Mp (double, double, const int[]); +#define TWO52 0x1.0p52 +#define TWOM1022 0x1.0p-1022 + /* Fix the sign and return after stage 1 or stage 2 */ static double signArctan2 (double y, double z) @@ -69,18 +53,15 @@ signArctan2 (double y, double z) return copysign (z, y); } -static double normalized (double, double, double, double); -void __mpatan2 (mp_no *, mp_no *, mp_no *, int); - +/* atan2 with max ULP of ~0.524 based on random sampling. */ double SECTION __ieee754_atan2 (double y, double x) { int i, de, ux, dx, uy, dy; - static const int pr[MM] = { 6, 8, 10, 20, 32 }; - double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, - z, zz, cor, s1, ss1, s2, ss2; - number num; + double ax, ay, u, du, v, vv, dv, t1, t2, t3, + z, zz, cor; + mynumber num; static const int ep = 59768832, /* 57*16**5 */ em = -59768832; /* -57*16**5 */ @@ -208,10 +189,8 @@ __ieee754_atan2 (double y, double x) if (x > 0) { double ret; - if ((z = ay / ax) < TWOM1022) - ret = normalized (ax, ay, y, z); - else - ret = signArctan2 (y, z); + z = ay / ax; + ret = signArctan2 (y, z); if (fabs (ret) < DBL_MIN) { double vret = ret ? ret : DBL_MIN; @@ -270,30 +249,12 @@ __ieee754_atan2 (double y, double x) + v * (d11.d + v * d13.d))))); - if ((z = u + (zz - u1.d * u)) == u + (zz + u1.d * u)) - return signArctan2 (y, z); - - MUL2 (u, du, u, du, v, vv, t1, t2); - s1 = v * (f11.d + v * (f13.d - + v * (f15.d + v * (f17.d + v * f19.d)))); - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (u, du, s1, ss1, s2, ss2, t1, t2); - ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); - - if ((z = s1 + (ss1 - u5.d * s1)) == s1 + (ss1 + u5.d * s1)) - return signArctan2 (y, z); - - return atan2Mp (x, y, pr); + z = u + zz; + /* Max ULP is 0.504. */ + return signArctan2 (y, z); } - i = (TWO52 + TWO8 * u) - TWO52; + i = (TWO52 + 256 * u) - TWO52; i -= 16; t3 = u - cij[i][0].d; EADD (t3, du, v, dv); @@ -304,43 +265,9 @@ __ieee754_atan2 (double y, double x) + v * (cij[i][4].d + v * (cij[i][5].d + v * cij[i][6].d)))); - if (i < 112) - { - if (i < 48) - u9 = u91.d; /* u < 1/4 */ - else - u9 = u92.d; - } /* 1/4 <= u < 1/2 */ - else - { - if (i < 176) - u9 = u93.d; /* 1/2 <= u < 3/4 */ - else - u9 = u94.d; - } /* 3/4 <= u <= 1 */ - if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1)) - return signArctan2 (y, z); - - t1 = u - hij[i][0].d; - EADD (t1, du, v, vv); - s1 = v * (hij[i][11].d - + v * (hij[i][12].d - + v * (hij[i][13].d - + v * (hij[i][14].d - + v * hij[i][15].d)))); - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - - if ((z = s2 + (ss2 - ub.d * s2)) == s2 + (ss2 + ub.d * s2)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t1 + zz; + /* Max ULP is 0.56. */ + return signArctan2 (y, z); } /* (ii) x>0, abs(x)<=abs(y): pi/2-atan(ax/ay) */ @@ -355,31 +282,12 @@ __ieee754_atan2 (double y, double x) + v * d13.d))))); ESUB (hpi.d, u, t2, cor); t3 = ((hpi1.d + cor) - du) - zz; - if ((z = t2 + (t3 - u2.d)) == t2 + (t3 + u2.d)) - return signArctan2 (y, z); - - MUL2 (u, du, u, du, v, vv, t1, t2); - s1 = v * (f11.d - + v * (f13.d - + v * (f15.d + v * (f17.d + v * f19.d)))); - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (u, du, s1, ss1, s2, ss2, t1, t2); - ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); - SUB2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2); - - if ((z = s2 + (ss2 - u6.d)) == s2 + (ss2 + u6.d)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t2 + t3; + /* Max ULP is 0.501. */ + return signArctan2 (y, z); } - i = (TWO52 + TWO8 * u) - TWO52; + i = (TWO52 + 256 * u) - TWO52; i -= 16; v = (u - cij[i][0].d) + du; @@ -389,36 +297,9 @@ __ieee754_atan2 (double y, double x) + v * (cij[i][5].d + v * cij[i][6].d)))); t1 = hpi.d - cij[i][1].d; - if (i < 112) - ua = ua1.d; /* w < 1/2 */ - else - ua = ua2.d; /* w >= 1/2 */ - if ((z = t1 + (zz - ua)) == t1 + (zz + ua)) - return signArctan2 (y, z); - - t1 = u - hij[i][0].d; - EADD (t1, du, v, vv); - - s1 = v * (hij[i][11].d - + v * (hij[i][12].d - + v * (hij[i][13].d - + v * (hij[i][14].d - + v * hij[i][15].d)))); - - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - SUB2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2); - - if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t1 + zz; + /* Max ULP is 0.503. */ + return signArctan2 (y, z); } /* (iii) x<0, abs(x)< abs(y): pi/2+atan(ax/ay) */ @@ -434,30 +315,12 @@ __ieee754_atan2 (double y, double x) + v * (d11.d + v * d13.d))))); EADD (hpi.d, u, t2, cor); t3 = ((hpi1.d + cor) + du) + zz; - if ((z = t2 + (t3 - u3.d)) == t2 + (t3 + u3.d)) - return signArctan2 (y, z); - - MUL2 (u, du, u, du, v, vv, t1, t2); - s1 = v * (f11.d - + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d)))); - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (u, du, s1, ss1, s2, ss2, t1, t2); - ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); - ADD2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2); - - if ((z = s2 + (ss2 - u7.d)) == s2 + (ss2 + u7.d)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t2 + t3; + /* Max ULP is 0.501. */ + return signArctan2 (y, z); } - i = (TWO52 + TWO8 * u) - TWO52; + i = (TWO52 + 256 * u) - TWO52; i -= 16; v = (u - cij[i][0].d) + du; zz = hpi1.d + v * (cij[i][2].d @@ -466,34 +329,9 @@ __ieee754_atan2 (double y, double x) + v * (cij[i][5].d + v * cij[i][6].d)))); t1 = hpi.d + cij[i][1].d; - if (i < 112) - ua = ua1.d; /* w < 1/2 */ - else - ua = ua2.d; /* w >= 1/2 */ - if ((z = t1 + (zz - ua)) == t1 + (zz + ua)) - return signArctan2 (y, z); - - t1 = u - hij[i][0].d; - EADD (t1, du, v, vv); - s1 = v * (hij[i][11].d - + v * (hij[i][12].d - + v * (hij[i][13].d - + v * (hij[i][14].d - + v * hij[i][15].d)))); - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - ADD2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2); - - if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t1 + zz; + /* Max ULP is 0.503. */ + return signArctan2 (y, z); } /* (iv) x<0, abs(y)<=abs(x): pi-atan(ax/ay) */ @@ -506,29 +344,12 @@ __ieee754_atan2 (double y, double x) + v * (d9.d + v * (d11.d + v * d13.d))))); ESUB (opi.d, u, t2, cor); t3 = ((opi1.d + cor) - du) - zz; - if ((z = t2 + (t3 - u4.d)) == t2 + (t3 + u4.d)) - return signArctan2 (y, z); - - MUL2 (u, du, u, du, v, vv, t1, t2); - s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d)))); - ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - MUL2 (u, du, s1, ss1, s2, ss2, t1, t2); - ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); - SUB2 (opi.d, opi1.d, s1, ss1, s2, ss2, t1, t2); - - if ((z = s2 + (ss2 - u8.d)) == s2 + (ss2 + u8.d)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t2 + t3; + /* Max ULP is 0.501. */ + return signArctan2 (y, z); } - i = (TWO52 + TWO8 * u) - TWO52; + i = (TWO52 + 256 * u) - TWO52; i -= 16; v = (u - cij[i][0].d) + du; zz = opi1.d - v * (cij[i][2].d @@ -536,86 +357,11 @@ __ieee754_atan2 (double y, double x) + v * (cij[i][4].d + v * (cij[i][5].d + v * cij[i][6].d)))); t1 = opi.d - cij[i][1].d; - if (i < 112) - ua = ua1.d; /* w < 1/2 */ - else - ua = ua2.d; /* w >= 1/2 */ - if ((z = t1 + (zz - ua)) == t1 + (zz + ua)) - return signArctan2 (y, z); - - t1 = u - hij[i][0].d; - - EADD (t1, du, v, vv); - - s1 = v * (hij[i][11].d - + v * (hij[i][12].d - + v * (hij[i][13].d - + v * (hij[i][14].d + v * hij[i][15].d)))); - - ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); - MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2); - ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); - SUB2 (opi.d, opi1.d, s2, ss2, s1, ss1, t1, t2); - - if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d)) - return signArctan2 (y, z); - return atan2Mp (x, y, pr); + z = t1 + zz; + /* Max ULP is 0.502. */ + return signArctan2 (y, z); } #ifndef __ieee754_atan2 libm_alias_finite (__ieee754_atan2, __atan2) #endif - -/* Treat the Denormalized case */ -static double -SECTION -normalized (double ax, double ay, double y, double z) -{ - int p; - mp_no mpx, mpy, mpz, mperr, mpz2, mpt1; - p = 6; - __dbl_mp (ax, &mpx, p); - __dbl_mp (ay, &mpy, p); - __dvd (&mpy, &mpx, &mpz, p); - __dbl_mp (ue.d, &mpt1, p); - __mul (&mpz, &mpt1, &mperr, p); - __sub (&mpz, &mperr, &mpz2, p); - __mp_dbl (&mpz2, &z, p); - return signArctan2 (y, z); -} - -/* Stage 3: Perform a multi-Precision computation */ -static double -SECTION -atan2Mp (double x, double y, const int pr[]) -{ - double z1, z2; - int i, p; - mp_no mpx, mpy, mpz, mpz1, mpz2, mperr, mpt1; - for (i = 0; i < MM; i++) - { - p = pr[i]; - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __mpatan2 (&mpy, &mpx, &mpz, p); - __dbl_mp (ud[i].d, &mpt1, p); - __mul (&mpz, &mpt1, &mperr, p); - __add (&mpz, &mperr, &mpz1, p); - __sub (&mpz, &mperr, &mpz2, p); - __mp_dbl (&mpz1, &z1, p); - __mp_dbl (&mpz2, &z2, p); - if (z1 == z2) - { - LIBC_PROBE (slowatan2, 4, &p, &x, &y, &z1); - return z1; - } - } - LIBC_PROBE (slowatan2_inexact, 4, &p, &x, &y, &z1); - return z1; /*if impossible to do exact computing */ -} |