about summary refs log tree commit diff
path: root/sysdeps/ieee754/dbl-64
diff options
context:
space:
mode:
authorPaul Zimmermann <Paul.Zimmermann@inria.fr>2021-04-02 08:21:06 +0200
committerPaul Zimmermann <Paul.Zimmermann@inria.fr>2021-04-07 13:23:39 +0200
commit43576de04afc6a0896a3ecc094e1581069a0652a (patch)
tree42b35efc19ae2a9f22c354176d83173f74818268 /sysdeps/ieee754/dbl-64
parentd1a3dcabf2f89233a99a4a9be08f9f407da0b6b4 (diff)
downloadglibc-43576de04afc6a0896a3ecc094e1581069a0652a.tar.gz
glibc-43576de04afc6a0896a3ecc094e1581069a0652a.tar.xz
glibc-43576de04afc6a0896a3ecc094e1581069a0652a.zip
Improve the accuracy of tgamma (BZ #26983)
With this patch, the maximal known error for tgamma is now reduced to 9 ulps
for dbl-64, for all rounding modes. Since exhaustive testing is not possible
for dbl-64, it might be that there are still cases with an error larger than
9 ulps, but all known cases are fixed (intensive tests were done to find cases
with large errors).

Tested on x86_64 and powerpc (and by Adhemerval Zanella on aarch64, arm,
s390x, sparc, and i686).
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Diffstat (limited to 'sysdeps/ieee754/dbl-64')
-rw-r--r--sysdeps/ieee754/dbl-64/e_gamma_r.c37
1 files changed, 26 insertions, 11 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_gamma_r.c b/sysdeps/ieee754/dbl-64/e_gamma_r.c
index fe69920c76..eb36a9f464 100644
--- a/sysdeps/ieee754/dbl-64/e_gamma_r.c
+++ b/sysdeps/ieee754/dbl-64/e_gamma_r.c
@@ -24,6 +24,7 @@
 #include <math-underflow.h>
 #include <float.h>
 #include <libm-alias-finite.h>
+#include <mul_split.h>
 
 /* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) inside exp in Stirling's
    approximation to gamma function.  */
@@ -88,7 +89,6 @@ gamma_positive (double x, int *exp2_adj)
 	 Compute gamma (X_ADJ + X_EPS) using Stirling's approximation,
 	 starting by computing pow (X_ADJ, X_ADJ) with a power of 2
 	 factored out.  */
-      double exp_adj = -eps;
       double x_adj_int = round (x_adj);
       double x_adj_frac = x_adj - x_adj_int;
       int x_adj_log2;
@@ -99,18 +99,22 @@ gamma_positive (double x, int *exp2_adj)
 	  x_adj_mant *= 2.0;
 	}
       *exp2_adj = x_adj_log2 * (int) x_adj_int;
-      double ret = (__ieee754_pow (x_adj_mant, x_adj)
-		    * __ieee754_exp2 (x_adj_log2 * x_adj_frac)
-		    * __ieee754_exp (-x_adj)
-		    * sqrt (2 * M_PI / x_adj)
-		    / prod);
-      exp_adj += x_eps * __ieee754_log (x_adj);
+      double h1, l1, h2, l2;
+      mul_split (&h1, &l1, __ieee754_pow (x_adj_mant, x_adj),
+			   __ieee754_exp2 (x_adj_log2 * x_adj_frac));
+      mul_split (&h2, &l2, __ieee754_exp (-x_adj), sqrt (2 * M_PI / x_adj));
+      mul_expansion (&h1, &l1, h1, l1, h2, l2);
+      /* Divide by prod * (1 + eps).  */
+      div_expansion (&h1, &l1, h1, l1, prod, prod * eps);
+      double exp_adj = x_eps * __ieee754_log (x_adj);
       double bsum = gamma_coeff[NCOEFF - 1];
       double x_adj2 = x_adj * x_adj;
       for (size_t i = 1; i <= NCOEFF - 1; i++)
 	bsum = bsum / x_adj2 + gamma_coeff[NCOEFF - 1 - i];
       exp_adj += bsum / x_adj;
-      return ret + ret * __expm1 (exp_adj);
+      /* Now return (h1+l1) * exp(exp_adj), where exp_adj is small.  */
+      l1 += h1 * __expm1 (exp_adj);
+      return h1 + l1;
     }
 }
 
@@ -188,9 +192,20 @@ __ieee754_gamma_r (double x, int *signgamp)
 			       ? __sin (M_PI * frac)
 			       : __cos (M_PI * (0.5 - frac)));
 	      int exp2_adj;
-	      double tret = M_PI / (-x * sinpix
-				    * gamma_positive (-x, &exp2_adj));
-	      ret = __scalbn (tret, -exp2_adj);
+	      double h1, l1, h2, l2;
+	      h2 = gamma_positive (-x, &exp2_adj);
+	      mul_split (&h1, &l1, sinpix, h2);
+	      /* sinpix*gamma_positive(.) = h1 + l1 */
+	      mul_split (&h2, &l2, h1, x);
+	      /* h1*x = h2 + l2 */
+	      /* (h1 + l1) * x = h1*x + l1*x = h2 + l2 + l1*x */
+	      l2 += l1 * x;
+	      /* x*sinpix*gamma_positive(.) ~ h2 + l2 */
+	      h1 = 0x3.243f6a8885a3p+0;   /* binary64 approximation of Pi */
+	      l1 = 0x8.d313198a2e038p-56; /* |h1+l1-Pi| < 3e-33 */
+	      /* Now we divide h1 + l1 by h2 + l2.  */
+	      div_expansion (&h1, &l1, h1, l1, h2, l2);
+	      ret = __scalbn (-h1, -exp2_adj);
 	      math_check_force_underflow_nonneg (ret);
 	    }
 	}