From e02920bc029019443326eecaa7b267b78ff2892e Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Mon, 29 Jun 2015 23:29:35 +0000
Subject: Improve tgamma accuracy (bug 18613).

In non-default rounding modes, tgamma can be slightly less accurate
than permitted by glibc's accuracy goals.

Part of the problem is error accumulation, addressed in this patch by
setting round-to-nearest for internal computations.  However, there
was also a bug in the code dealing with computing pow (x + n, x + n)
where x + n is not exactly representable, providing another source of
error even in round-to-nearest mode; it was necessary to address both
bugs to get errors for all testcases within glibc's accuracy goals.
Given this second fix, accuracy in round-to-nearest mode is also
improved (hence regeneration of ulps for tgamma should be from scratch
- truncate libm-test-ulps or at least remove existing tgamma entries -
so that the expected ulps can be reduced).

Some additional complications also arose.  Certain tgamma tests should
strictly, according to IEEE semantics, overflow or not depending on
the rounding mode; this is beyond the scope of glibc's accuracy goals
for any function without exactly-determined results, but
gen-auto-libm-tests doesn't handle being lax there as it does for
underflow.  (libm-test.inc also doesn't handle being lax about whether
the result in cases very close to the overflow threshold is infinity
or a finite value close to overflow, but that doesn't cause problems
in this case though I've seen it cause problems with random test
generation for some functions.)  Thus, spurious-overflow markings,
with a comment, are added to auto-libm-test-in (no bug in Bugzilla
because the issue is with the testsuite, not a user-visible bug in
glibc).  And on x86, after the patch I saw ERANGE issues as previously
reported by Carlos (see my commentary in
<https://sourceware.org/ml/libc-alpha/2015-01/msg00485.html>), which
needed addressing by ensuring excess range and precision were
eliminated at various points if FLT_EVAL_METHOD != 0.

I also noticed and fixed a cosmetic issue where 1.0f was used in long
double functions and should have been 1.0L.

This completes the move of all functions to testing in all rounding
modes with ALL_RM_TEST, so gen-libm-have-vector-test.sh is updated to
remove the workaround for some functions not using ALL_RM_TEST.

Tested for x86_64, x86, mips64 and powerpc.

	[BZ #18613]
	* sysdeps/ieee754/dbl-64/e_gamma_r.c (gamma_positive): Take log of
	X_ADJ not X when adjusting exponent.
	(__ieee754_gamma_r): Do intermediate computations in
	round-to-nearest then adjust overflowing and underflowing results
	as needed.
	* sysdeps/ieee754/flt-32/e_gammaf_r.c (gammaf_positive): Take log
	of X_ADJ not X when adjusting exponent.
	(__ieee754_gammaf_r): Do intermediate computations in
	round-to-nearest then adjust overflowing and underflowing results
	as needed.
	* sysdeps/ieee754/ldbl-128/e_gammal_r.c (gammal_positive): Take
	log of X_ADJ not X when adjusting exponent.
	(__ieee754_gammal_r): Do intermediate computations in
	round-to-nearest then adjust overflowing and underflowing results
	as needed.  Use 1.0L not 1.0f as numerator of division.
	* sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c (gammal_positive): Take
	log of X_ADJ not X when adjusting exponent.
	(__ieee754_gammal_r): Do intermediate computations in
	round-to-nearest then adjust overflowing and underflowing results
	as needed.  Use 1.0L not 1.0f as numerator of division.
	* sysdeps/ieee754/ldbl-96/e_gammal_r.c (gammal_positive): Take log
	of X_ADJ not X when adjusting exponent.
	(__ieee754_gammal_r): Do intermediate computations in
	round-to-nearest then adjust overflowing and underflowing results
	as needed.  Use 1.0L not 1.0f as numerator of division.
	* math/libm-test.inc (tgamma_test_data): Remove one test.  Moved
	to auto-libm-test-in.
	(tgamma_test): Use ALL_RM_TEST.
	* math/auto-libm-test-in: Add one test of tgamma.  Mark some other
	tests of tgamma with spurious-overflow.
	* math/auto-libm-test-out: Regenerated.
	* math/gen-libm-have-vector-test.sh: Do not check for START.
	* sysdeps/i386/fpu/libm-test-ulps: Update.
	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
---
 sysdeps/ieee754/dbl-64/e_gamma_r.c | 87 +++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 25 deletions(-)

(limited to 'sysdeps/ieee754/dbl-64')

diff --git a/sysdeps/ieee754/dbl-64/e_gamma_r.c b/sysdeps/ieee754/dbl-64/e_gamma_r.c
index 7a2af31c46..adeb61a248 100644
--- a/sysdeps/ieee754/dbl-64/e_gamma_r.c
+++ b/sysdeps/ieee754/dbl-64/e_gamma_r.c
@@ -104,7 +104,7 @@ gamma_positive (double x, int *exp2_adj)
 		    * __ieee754_exp (-x_adj)
 		    * __ieee754_sqrt (2 * M_PI / x_adj)
 		    / prod);
-      exp_adj += x_eps * __ieee754_log (x);
+      exp_adj += x_eps * __ieee754_log (x_adj);
       double bsum = gamma_coeff[NCOEFF - 1];
       double x_adj2 = x_adj * x_adj;
       for (size_t i = 1; i <= NCOEFF - 1; i++)
@@ -119,6 +119,10 @@ __ieee754_gamma_r (double x, int *signgamp)
 {
   int32_t hx;
   u_int32_t lx;
+#if FLT_EVAL_METHOD != 0
+  volatile
+#endif
+  double ret;
 
   EXTRACT_WORDS (hx, lx, x);
 
@@ -153,36 +157,69 @@ __ieee754_gamma_r (double x, int *signgamp)
     {
       /* Overflow.  */
       *signgamp = 0;
-      return DBL_MAX * DBL_MAX;
+      ret = DBL_MAX * DBL_MAX;
+      return ret;
     }
-  else if (x > 0.0)
+  else
     {
-      *signgamp = 0;
-      int exp2_adj;
-      double ret = gamma_positive (x, &exp2_adj);
-      return __scalbn (ret, exp2_adj);
+      SET_RESTORE_ROUND (FE_TONEAREST);
+      if (x > 0.0)
+	{
+	  *signgamp = 0;
+	  int exp2_adj;
+	  double tret = gamma_positive (x, &exp2_adj);
+	  ret = __scalbn (tret, exp2_adj);
+	}
+      else if (x >= -DBL_EPSILON / 4.0)
+	{
+	  *signgamp = 0;
+	  ret = 1.0 / x;
+	}
+      else
+	{
+	  double tx = __trunc (x);
+	  *signgamp = (tx == 2.0 * __trunc (tx / 2.0)) ? -1 : 1;
+	  if (x <= -184.0)
+	    /* Underflow.  */
+	    ret = DBL_MIN * DBL_MIN;
+	  else
+	    {
+	      double frac = tx - x;
+	      if (frac > 0.5)
+		frac = 1.0 - frac;
+	      double sinpix = (frac <= 0.25
+			       ? __sin (M_PI * frac)
+			       : __cos (M_PI * (0.5 - frac)));
+	      int exp2_adj;
+	      double tret = M_PI / (-x * sinpix
+				    * gamma_positive (-x, &exp2_adj));
+	      ret = __scalbn (tret, -exp2_adj);
+	    }
+	}
     }
-  else if (x >= -DBL_EPSILON / 4.0)
+  if (isinf (ret) && x != 0)
     {
-      *signgamp = 0;
-      return 1.0 / x;
+      if (*signgamp < 0)
+	{
+	  ret = -__copysign (DBL_MAX, ret) * DBL_MAX;
+	  ret = -ret;
+	}
+      else
+	ret = __copysign (DBL_MAX, ret) * DBL_MAX;
+      return ret;
     }
-  else
+  else if (ret == 0)
     {
-      double tx = __trunc (x);
-      *signgamp = (tx == 2.0 * __trunc (tx / 2.0)) ? -1 : 1;
-      if (x <= -184.0)
-	/* Underflow.  */
-	return DBL_MIN * DBL_MIN;
-      double frac = tx - x;
-      if (frac > 0.5)
-	frac = 1.0 - frac;
-      double sinpix = (frac <= 0.25
-		       ? __sin (M_PI * frac)
-		       : __cos (M_PI * (0.5 - frac)));
-      int exp2_adj;
-      double ret = M_PI / (-x * sinpix * gamma_positive (-x, &exp2_adj));
-      return __scalbn (ret, -exp2_adj);
+      if (*signgamp < 0)
+	{
+	  ret = -__copysign (DBL_MIN, ret) * DBL_MIN;
+	  ret = -ret;
+	}
+      else
+	ret = __copysign (DBL_MIN, ret) * DBL_MIN;
+      return ret;
     }
+  else
+    return ret;
 }
 strong_alias (__ieee754_gamma_r, __gamma_r_finite)
-- 
cgit 1.4.1