diff options
author | Joseph Myers <joseph@codesourcery.com> | 2015-06-29 23:29:35 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2015-06-29 23:29:35 +0000 |
commit | e02920bc029019443326eecaa7b267b78ff2892e (patch) | |
tree | ed2c714fa65b145dc0a8eed3c3ac1d090d5ca525 /sysdeps/ieee754 | |
parent | 4aa10d01b654b7395c6e856b8e1e0959cfc80739 (diff) | |
download | glibc-e02920bc029019443326eecaa7b267b78ff2892e.tar.gz glibc-e02920bc029019443326eecaa7b267b78ff2892e.tar.xz glibc-e02920bc029019443326eecaa7b267b78ff2892e.zip |
Improve tgamma accuracy (bug 18613).
In non-default rounding modes, tgamma can be slightly less accurate than permitted by glibc's accuracy goals. Part of the problem is error accumulation, addressed in this patch by setting round-to-nearest for internal computations. However, there was also a bug in the code dealing with computing pow (x + n, x + n) where x + n is not exactly representable, providing another source of error even in round-to-nearest mode; it was necessary to address both bugs to get errors for all testcases within glibc's accuracy goals. Given this second fix, accuracy in round-to-nearest mode is also improved (hence regeneration of ulps for tgamma should be from scratch - truncate libm-test-ulps or at least remove existing tgamma entries - so that the expected ulps can be reduced). Some additional complications also arose. Certain tgamma tests should strictly, according to IEEE semantics, overflow or not depending on the rounding mode; this is beyond the scope of glibc's accuracy goals for any function without exactly-determined results, but gen-auto-libm-tests doesn't handle being lax there as it does for underflow. (libm-test.inc also doesn't handle being lax about whether the result in cases very close to the overflow threshold is infinity or a finite value close to overflow, but that doesn't cause problems in this case though I've seen it cause problems with random test generation for some functions.) Thus, spurious-overflow markings, with a comment, are added to auto-libm-test-in (no bug in Bugzilla because the issue is with the testsuite, not a user-visible bug in glibc). And on x86, after the patch I saw ERANGE issues as previously reported by Carlos (see my commentary in <https://sourceware.org/ml/libc-alpha/2015-01/msg00485.html>), which needed addressing by ensuring excess range and precision were eliminated at various points if FLT_EVAL_METHOD != 0. I also noticed and fixed a cosmetic issue where 1.0f was used in long double functions and should have been 1.0L. This completes the move of all functions to testing in all rounding modes with ALL_RM_TEST, so gen-libm-have-vector-test.sh is updated to remove the workaround for some functions not using ALL_RM_TEST. Tested for x86_64, x86, mips64 and powerpc. [BZ #18613] * sysdeps/ieee754/dbl-64/e_gamma_r.c (gamma_positive): Take log of X_ADJ not X when adjusting exponent. (__ieee754_gamma_r): Do intermediate computations in round-to-nearest then adjust overflowing and underflowing results as needed. * sysdeps/ieee754/flt-32/e_gammaf_r.c (gammaf_positive): Take log of X_ADJ not X when adjusting exponent. (__ieee754_gammaf_r): Do intermediate computations in round-to-nearest then adjust overflowing and underflowing results as needed. * sysdeps/ieee754/ldbl-128/e_gammal_r.c (gammal_positive): Take log of X_ADJ not X when adjusting exponent. (__ieee754_gammal_r): Do intermediate computations in round-to-nearest then adjust overflowing and underflowing results as needed. Use 1.0L not 1.0f as numerator of division. * sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c (gammal_positive): Take log of X_ADJ not X when adjusting exponent. (__ieee754_gammal_r): Do intermediate computations in round-to-nearest then adjust overflowing and underflowing results as needed. Use 1.0L not 1.0f as numerator of division. * sysdeps/ieee754/ldbl-96/e_gammal_r.c (gammal_positive): Take log of X_ADJ not X when adjusting exponent. (__ieee754_gammal_r): Do intermediate computations in round-to-nearest then adjust overflowing and underflowing results as needed. Use 1.0L not 1.0f as numerator of division. * math/libm-test.inc (tgamma_test_data): Remove one test. Moved to auto-libm-test-in. (tgamma_test): Use ALL_RM_TEST. * math/auto-libm-test-in: Add one test of tgamma. Mark some other tests of tgamma with spurious-overflow. * math/auto-libm-test-out: Regenerated. * math/gen-libm-have-vector-test.sh: Do not check for START. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r-- | sysdeps/ieee754/dbl-64/e_gamma_r.c | 87 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/e_gammaf_r.c | 88 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/e_gammal_r.c | 74 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c | 74 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/e_gammal_r.c | 74 |
5 files changed, 271 insertions, 126 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_gamma_r.c b/sysdeps/ieee754/dbl-64/e_gamma_r.c index 7a2af31c46..adeb61a248 100644 --- a/sysdeps/ieee754/dbl-64/e_gamma_r.c +++ b/sysdeps/ieee754/dbl-64/e_gamma_r.c @@ -104,7 +104,7 @@ gamma_positive (double x, int *exp2_adj) * __ieee754_exp (-x_adj) * __ieee754_sqrt (2 * M_PI / x_adj) / prod); - exp_adj += x_eps * __ieee754_log (x); + exp_adj += x_eps * __ieee754_log (x_adj); double bsum = gamma_coeff[NCOEFF - 1]; double x_adj2 = x_adj * x_adj; for (size_t i = 1; i <= NCOEFF - 1; i++) @@ -119,6 +119,10 @@ __ieee754_gamma_r (double x, int *signgamp) { int32_t hx; u_int32_t lx; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + double ret; EXTRACT_WORDS (hx, lx, x); @@ -153,36 +157,69 @@ __ieee754_gamma_r (double x, int *signgamp) { /* Overflow. */ *signgamp = 0; - return DBL_MAX * DBL_MAX; + ret = DBL_MAX * DBL_MAX; + return ret; } - else if (x > 0.0) + else { - *signgamp = 0; - int exp2_adj; - double ret = gamma_positive (x, &exp2_adj); - return __scalbn (ret, exp2_adj); + SET_RESTORE_ROUND (FE_TONEAREST); + if (x > 0.0) + { + *signgamp = 0; + int exp2_adj; + double tret = gamma_positive (x, &exp2_adj); + ret = __scalbn (tret, exp2_adj); + } + else if (x >= -DBL_EPSILON / 4.0) + { + *signgamp = 0; + ret = 1.0 / x; + } + else + { + double tx = __trunc (x); + *signgamp = (tx == 2.0 * __trunc (tx / 2.0)) ? -1 : 1; + if (x <= -184.0) + /* Underflow. */ + ret = DBL_MIN * DBL_MIN; + else + { + double frac = tx - x; + if (frac > 0.5) + frac = 1.0 - frac; + double sinpix = (frac <= 0.25 + ? __sin (M_PI * frac) + : __cos (M_PI * (0.5 - frac))); + int exp2_adj; + double tret = M_PI / (-x * sinpix + * gamma_positive (-x, &exp2_adj)); + ret = __scalbn (tret, -exp2_adj); + } + } } - else if (x >= -DBL_EPSILON / 4.0) + if (isinf (ret) && x != 0) { - *signgamp = 0; - return 1.0 / x; + if (*signgamp < 0) + { + ret = -__copysign (DBL_MAX, ret) * DBL_MAX; + ret = -ret; + } + else + ret = __copysign (DBL_MAX, ret) * DBL_MAX; + return ret; } - else + else if (ret == 0) { - double tx = __trunc (x); - *signgamp = (tx == 2.0 * __trunc (tx / 2.0)) ? -1 : 1; - if (x <= -184.0) - /* Underflow. */ - return DBL_MIN * DBL_MIN; - double frac = tx - x; - if (frac > 0.5) - frac = 1.0 - frac; - double sinpix = (frac <= 0.25 - ? __sin (M_PI * frac) - : __cos (M_PI * (0.5 - frac))); - int exp2_adj; - double ret = M_PI / (-x * sinpix * gamma_positive (-x, &exp2_adj)); - return __scalbn (ret, -exp2_adj); + if (*signgamp < 0) + { + ret = -__copysign (DBL_MIN, ret) * DBL_MIN; + ret = -ret; + } + else + ret = __copysign (DBL_MIN, ret) * DBL_MIN; + return ret; } + else + return ret; } strong_alias (__ieee754_gamma_r, __gamma_r_finite) diff --git a/sysdeps/ieee754/flt-32/e_gammaf_r.c b/sysdeps/ieee754/flt-32/e_gammaf_r.c index ff67eca7d3..29fe8b46c2 100644 --- a/sysdeps/ieee754/flt-32/e_gammaf_r.c +++ b/sysdeps/ieee754/flt-32/e_gammaf_r.c @@ -97,7 +97,7 @@ gammaf_positive (float x, int *exp2_adj) * __ieee754_expf (-x_adj) * __ieee754_sqrtf (2 * (float) M_PI / x_adj) / prod); - exp_adj += x_eps * __ieee754_logf (x); + exp_adj += x_eps * __ieee754_logf (x_adj); float bsum = gamma_coeff[NCOEFF - 1]; float x_adj2 = x_adj * x_adj; for (size_t i = 1; i <= NCOEFF - 1; i++) @@ -111,6 +111,10 @@ float __ieee754_gammaf_r (float x, int *signgamp) { int32_t hx; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + float ret; GET_FLOAT_WORD (hx, x); @@ -145,37 +149,69 @@ __ieee754_gammaf_r (float x, int *signgamp) { /* Overflow. */ *signgamp = 0; - return FLT_MAX * FLT_MAX; + ret = FLT_MAX * FLT_MAX; + return ret; } - else if (x > 0.0f) + else { - *signgamp = 0; - int exp2_adj; - float ret = gammaf_positive (x, &exp2_adj); - return __scalbnf (ret, exp2_adj); + SET_RESTORE_ROUNDF (FE_TONEAREST); + if (x > 0.0f) + { + *signgamp = 0; + int exp2_adj; + float tret = gammaf_positive (x, &exp2_adj); + ret = __scalbnf (tret, exp2_adj); + } + else if (x >= -FLT_EPSILON / 4.0f) + { + *signgamp = 0; + ret = 1.0f / x; + } + else + { + float tx = __truncf (x); + *signgamp = (tx == 2.0f * __truncf (tx / 2.0f)) ? -1 : 1; + if (x <= -42.0f) + /* Underflow. */ + ret = FLT_MIN * FLT_MIN; + else + { + float frac = tx - x; + if (frac > 0.5f) + frac = 1.0f - frac; + float sinpix = (frac <= 0.25f + ? __sinf ((float) M_PI * frac) + : __cosf ((float) M_PI * (0.5f - frac))); + int exp2_adj; + float tret = (float) M_PI / (-x * sinpix + * gammaf_positive (-x, &exp2_adj)); + ret = __scalbnf (tret, -exp2_adj); + } + } } - else if (x >= -FLT_EPSILON / 4.0f) + if (isinf (ret) && x != 0) { - *signgamp = 0; - return 1.0f / x; + if (*signgamp < 0) + { + ret = -__copysignf (FLT_MAX, ret) * FLT_MAX; + ret = -ret; + } + else + ret = __copysignf (FLT_MAX, ret) * FLT_MAX; + return ret; } - else + else if (ret == 0) { - float tx = __truncf (x); - *signgamp = (tx == 2.0f * __truncf (tx / 2.0f)) ? -1 : 1; - if (x <= -42.0f) - /* Underflow. */ - return FLT_MIN * FLT_MIN; - float frac = tx - x; - if (frac > 0.5f) - frac = 1.0f - frac; - float sinpix = (frac <= 0.25f - ? __sinf ((float) M_PI * frac) - : __cosf ((float) M_PI * (0.5f - frac))); - int exp2_adj; - float ret = (float) M_PI / (-x * sinpix - * gammaf_positive (-x, &exp2_adj)); - return __scalbnf (ret, -exp2_adj); + if (*signgamp < 0) + { + ret = -__copysignf (FLT_MIN, ret) * FLT_MIN; + ret = -ret; + } + else + ret = __copysignf (FLT_MIN, ret) * FLT_MIN; + return ret; } + else + return ret; } strong_alias (__ieee754_gammaf_r, __gammaf_r_finite) diff --git a/sysdeps/ieee754/ldbl-128/e_gammal_r.c b/sysdeps/ieee754/ldbl-128/e_gammal_r.c index 39c0638ac3..c51b050e0e 100644 --- a/sysdeps/ieee754/ldbl-128/e_gammal_r.c +++ b/sysdeps/ieee754/ldbl-128/e_gammal_r.c @@ -109,7 +109,7 @@ gammal_positive (long double x, int *exp2_adj) * __ieee754_expl (-x_adj) * __ieee754_sqrtl (2 * M_PIl / x_adj) / prod); - exp_adj += x_eps * __ieee754_logl (x); + exp_adj += x_eps * __ieee754_logl (x_adj); long double bsum = gamma_coeff[NCOEFF - 1]; long double x_adj2 = x_adj * x_adj; for (size_t i = 1; i <= NCOEFF - 1; i++) @@ -124,6 +124,7 @@ __ieee754_gammal_r (long double x, int *signgamp) { int64_t hx; u_int64_t lx; + long double ret; GET_LDOUBLE_WORDS64 (hx, lx, x); @@ -159,35 +160,58 @@ __ieee754_gammal_r (long double x, int *signgamp) *signgamp = 0; return LDBL_MAX * LDBL_MAX; } - else if (x > 0.0L) + else { - *signgamp = 0; - int exp2_adj; - long double ret = gammal_positive (x, &exp2_adj); - return __scalbnl (ret, exp2_adj); + SET_RESTORE_ROUNDL (FE_TONEAREST); + if (x > 0.0L) + { + *signgamp = 0; + int exp2_adj; + ret = gammal_positive (x, &exp2_adj); + ret = __scalbnl (ret, exp2_adj); + } + else if (x >= -LDBL_EPSILON / 4.0L) + { + *signgamp = 0; + ret = 1.0L / x; + } + else + { + long double tx = __truncl (x); + *signgamp = (tx == 2.0L * __truncl (tx / 2.0L)) ? -1 : 1; + if (x <= -1775.0L) + /* Underflow. */ + ret = LDBL_MIN * LDBL_MIN; + else + { + long double frac = tx - x; + if (frac > 0.5L) + frac = 1.0L - frac; + long double sinpix = (frac <= 0.25L + ? __sinl (M_PIl * frac) + : __cosl (M_PIl * (0.5L - frac))); + int exp2_adj; + ret = M_PIl / (-x * sinpix + * gammal_positive (-x, &exp2_adj)); + ret = __scalbnl (ret, -exp2_adj); + } + } } - else if (x >= -LDBL_EPSILON / 4.0L) + if (isinf (ret) && x != 0) { - *signgamp = 0; - return 1.0f / x; + if (*signgamp < 0) + return -(-__copysignl (LDBL_MAX, ret) * LDBL_MAX); + else + return __copysignl (LDBL_MAX, ret) * LDBL_MAX; } - else + else if (ret == 0) { - long double tx = __truncl (x); - *signgamp = (tx == 2.0L * __truncl (tx / 2.0L)) ? -1 : 1; - if (x <= -1775.0L) - /* Underflow. */ - return LDBL_MIN * LDBL_MIN; - long double frac = tx - x; - if (frac > 0.5L) - frac = 1.0L - frac; - long double sinpix = (frac <= 0.25L - ? __sinl (M_PIl * frac) - : __cosl (M_PIl * (0.5L - frac))); - int exp2_adj; - long double ret = M_PIl / (-x * sinpix - * gammal_positive (-x, &exp2_adj)); - return __scalbnl (ret, -exp2_adj); + if (*signgamp < 0) + return -(-__copysignl (LDBL_MIN, ret) * LDBL_MIN); + else + return __copysignl (LDBL_MIN, ret) * LDBL_MIN; } + else + return ret; } strong_alias (__ieee754_gammal_r, __gammal_r_finite) diff --git a/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c b/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c index 1322bb0ea3..48098c18f6 100644 --- a/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c +++ b/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c @@ -108,7 +108,7 @@ gammal_positive (long double x, int *exp2_adj) * __ieee754_expl (-x_adj) * __ieee754_sqrtl (2 * M_PIl / x_adj) / prod); - exp_adj += x_eps * __ieee754_logl (x); + exp_adj += x_eps * __ieee754_logl (x_adj); long double bsum = gamma_coeff[NCOEFF - 1]; long double x_adj2 = x_adj * x_adj; for (size_t i = 1; i <= NCOEFF - 1; i++) @@ -123,6 +123,7 @@ __ieee754_gammal_r (long double x, int *signgamp) { int64_t hx; double xhi; + long double ret; xhi = ldbl_high (x); EXTRACT_WORDS64 (hx, xhi); @@ -159,35 +160,58 @@ __ieee754_gammal_r (long double x, int *signgamp) *signgamp = 0; return LDBL_MAX * LDBL_MAX; } - else if (x > 0.0L) + else { - *signgamp = 0; - int exp2_adj; - long double ret = gammal_positive (x, &exp2_adj); - return __scalbnl (ret, exp2_adj); + SET_RESTORE_ROUNDL (FE_TONEAREST); + if (x > 0.0L) + { + *signgamp = 0; + int exp2_adj; + ret = gammal_positive (x, &exp2_adj); + ret = __scalbnl (ret, exp2_adj); + } + else if (x >= -0x1p-110L) + { + *signgamp = 0; + ret = 1.0L / x; + } + else + { + long double tx = __truncl (x); + *signgamp = (tx == 2.0L * __truncl (tx / 2.0L)) ? -1 : 1; + if (x <= -191.0L) + /* Underflow. */ + ret = LDBL_MIN * LDBL_MIN; + else + { + long double frac = tx - x; + if (frac > 0.5L) + frac = 1.0L - frac; + long double sinpix = (frac <= 0.25L + ? __sinl (M_PIl * frac) + : __cosl (M_PIl * (0.5L - frac))); + int exp2_adj; + ret = M_PIl / (-x * sinpix + * gammal_positive (-x, &exp2_adj)); + ret = __scalbnl (ret, -exp2_adj); + } + } } - else if (x >= -0x1p-110L) + if (isinf (ret) && x != 0) { - *signgamp = 0; - return 1.0f / x; + if (*signgamp < 0) + return -(-__copysignl (LDBL_MAX, ret) * LDBL_MAX); + else + return __copysignl (LDBL_MAX, ret) * LDBL_MAX; } - else + else if (ret == 0) { - long double tx = __truncl (x); - *signgamp = (tx == 2.0L * __truncl (tx / 2.0L)) ? -1 : 1; - if (x <= -191.0L) - /* Underflow. */ - return LDBL_MIN * LDBL_MIN; - long double frac = tx - x; - if (frac > 0.5L) - frac = 1.0L - frac; - long double sinpix = (frac <= 0.25L - ? __sinl (M_PIl * frac) - : __cosl (M_PIl * (0.5L - frac))); - int exp2_adj; - long double ret = M_PIl / (-x * sinpix - * gammal_positive (-x, &exp2_adj)); - return __scalbnl (ret, -exp2_adj); + if (*signgamp < 0) + return -(-__copysignl (LDBL_MIN, ret) * LDBL_MIN); + else + return __copysignl (LDBL_MIN, ret) * LDBL_MIN; } + else + return ret; } strong_alias (__ieee754_gammal_r, __gammal_r_finite) diff --git a/sysdeps/ieee754/ldbl-96/e_gammal_r.c b/sysdeps/ieee754/ldbl-96/e_gammal_r.c index 800522b7c8..9da5db33f0 100644 --- a/sysdeps/ieee754/ldbl-96/e_gammal_r.c +++ b/sysdeps/ieee754/ldbl-96/e_gammal_r.c @@ -102,7 +102,7 @@ gammal_positive (long double x, int *exp2_adj) * __ieee754_expl (-x_adj) * __ieee754_sqrtl (2 * M_PIl / x_adj) / prod); - exp_adj += x_eps * __ieee754_logl (x); + exp_adj += x_eps * __ieee754_logl (x_adj); long double bsum = gamma_coeff[NCOEFF - 1]; long double x_adj2 = x_adj * x_adj; for (size_t i = 1; i <= NCOEFF - 1; i++) @@ -116,6 +116,7 @@ long double __ieee754_gammal_r (long double x, int *signgamp) { u_int32_t es, hx, lx; + long double ret; GET_LDOUBLE_WORDS (es, hx, lx, x); @@ -151,35 +152,58 @@ __ieee754_gammal_r (long double x, int *signgamp) *signgamp = 0; return LDBL_MAX * LDBL_MAX; } - else if (x > 0.0L) + else { - *signgamp = 0; - int exp2_adj; - long double ret = gammal_positive (x, &exp2_adj); - return __scalbnl (ret, exp2_adj); + SET_RESTORE_ROUNDL (FE_TONEAREST); + if (x > 0.0L) + { + *signgamp = 0; + int exp2_adj; + ret = gammal_positive (x, &exp2_adj); + ret = __scalbnl (ret, exp2_adj); + } + else if (x >= -LDBL_EPSILON / 4.0L) + { + *signgamp = 0; + ret = 1.0L / x; + } + else + { + long double tx = __truncl (x); + *signgamp = (tx == 2.0L * __truncl (tx / 2.0L)) ? -1 : 1; + if (x <= -1766.0L) + /* Underflow. */ + ret = LDBL_MIN * LDBL_MIN; + else + { + long double frac = tx - x; + if (frac > 0.5L) + frac = 1.0L - frac; + long double sinpix = (frac <= 0.25L + ? __sinl (M_PIl * frac) + : __cosl (M_PIl * (0.5L - frac))); + int exp2_adj; + ret = M_PIl / (-x * sinpix + * gammal_positive (-x, &exp2_adj)); + ret = __scalbnl (ret, -exp2_adj); + } + } } - else if (x >= -LDBL_EPSILON / 4.0L) + if (isinf (ret) && x != 0) { - *signgamp = 0; - return 1.0f / x; + if (*signgamp < 0) + return -(-__copysignl (LDBL_MAX, ret) * LDBL_MAX); + else + return __copysignl (LDBL_MAX, ret) * LDBL_MAX; } - else + else if (ret == 0) { - long double tx = __truncl (x); - *signgamp = (tx == 2.0L * __truncl (tx / 2.0L)) ? -1 : 1; - if (x <= -1766.0L) - /* Underflow. */ - return LDBL_MIN * LDBL_MIN; - long double frac = tx - x; - if (frac > 0.5L) - frac = 1.0L - frac; - long double sinpix = (frac <= 0.25L - ? __sinl (M_PIl * frac) - : __cosl (M_PIl * (0.5L - frac))); - int exp2_adj; - long double ret = M_PIl / (-x * sinpix - * gammal_positive (-x, &exp2_adj)); - return __scalbnl (ret, -exp2_adj); + if (*signgamp < 0) + return -(-__copysignl (LDBL_MIN, ret) * LDBL_MIN); + else + return __copysignl (LDBL_MIN, ret) * LDBL_MIN; } + else + return ret; } strong_alias (__ieee754_gammal_r, __gammal_r_finite) |