diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/generic/math_private.h | 16 | ||||
-rw-r--r-- | sysdeps/i386/fpu/libm-test-ulps | 96 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/e_lgamma_r.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/lgamma_neg.c | 399 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/lgamma_product.c | 82 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/e_lgammaf_r.c | 3 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/lgamma_negf.c | 288 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/lgamma_productf.c | 1 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/e_lgammal_r.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/lgamma_negl.c | 551 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/lgamma_productl.c | 82 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/lgamma_negl.c | 532 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/lgamma_productl.c | 38 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/e_lgammal_r.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/lgamma_negl.c | 418 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/lgamma_product.c | 37 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/lgamma_productl.c | 82 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/libm-test-ulps | 96 |
18 files changed, 2631 insertions, 96 deletions
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index 0ab547d82f..6aea8643da 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -382,6 +382,22 @@ extern double __gamma_product (double x, double x_eps, int n, double *eps); extern long double __gamma_productl (long double x, long double x_eps, int n, long double *eps); +/* Compute lgamma of a negative argument X, if it is in a range + (depending on the floating-point format) for which expansion around + zeros is used, setting *SIGNGAMP accordingly. */ +extern float __lgamma_negf (float x, int *signgamp); +extern double __lgamma_neg (double x, int *signgamp); +extern long double __lgamma_negl (long double x, int *signgamp); + +/* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + + 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that + all the values X + 1, ..., X + N - 1 are exactly representable, and + X_EPS / X is small enough that factors quadratic in it can be + neglected. */ +extern double __lgamma_product (double t, double x, double x_eps, int n); +extern long double __lgamma_productl (long double t, long double x, + long double x_eps, int n); + #ifndef math_opt_barrier # define math_opt_barrier(x) \ ({ __typeof (x) __x = (x); __asm ("" : "+m" (__x)); __x; }) diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index 1cbf0db898..2b81704baa 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -1558,36 +1558,36 @@ ildouble: 4 ldouble: 4 Function: "gamma": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 Function: "gamma_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 6 -ldouble: 6 +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "gamma_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 6 -ldouble: 6 +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "gamma_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 5 +ldouble: 5 Function: "hypot": double: 1 @@ -1710,36 +1710,36 @@ ildouble: 5 ldouble: 5 Function: "lgamma": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 Function: "lgamma_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 6 -ldouble: 6 +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "lgamma_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 6 -ldouble: 6 +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "lgamma_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 5 +ldouble: 5 Function: "log": double: 1 diff --git a/sysdeps/ieee754/dbl-64/e_lgamma_r.c b/sysdeps/ieee754/dbl-64/e_lgamma_r.c index fc6f594d62..ea8a9b42fb 100644 --- a/sysdeps/ieee754/dbl-64/e_lgamma_r.c +++ b/sysdeps/ieee754/dbl-64/e_lgamma_r.c @@ -226,6 +226,8 @@ __ieee754_lgamma_r(double x, int *signgamp) if(__builtin_expect(ix>=0x43300000, 0)) /* |x|>=2**52, must be -integer */ return x/zero; + if (x < -2.0 && x > -28.0) + return __lgamma_neg (x, signgamp); t = sin_pi(x); if(t==zero) return one/fabsf(t); /* -integer */ nadj = __ieee754_log(pi/fabs(t*x)); diff --git a/sysdeps/ieee754/dbl-64/lgamma_neg.c b/sysdeps/ieee754/dbl-64/lgamma_neg.c new file mode 100644 index 0000000000..8f54a0f98e --- /dev/null +++ b/sysdeps/ieee754/dbl-64/lgamma_neg.c @@ -0,0 +1,399 @@ +/* lgamma expanding around zeros. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <math_private.h> + +static const double lgamma_zeros[][2] = + { + { -0x2.74ff92c01f0d8p+0, -0x2.abec9f315f1ap-56 }, + { -0x2.bf6821437b202p+0, 0x6.866a5b4b9be14p-56 }, + { -0x3.24c1b793cb35ep+0, -0xf.b8be699ad3d98p-56 }, + { -0x3.f48e2a8f85fcap+0, -0x1.70d4561291237p-56 }, + { -0x4.0a139e1665604p+0, 0xf.3c60f4f21e7fp-56 }, + { -0x4.fdd5de9bbabf4p+0, 0xa.ef2f55bf89678p-56 }, + { -0x5.021a95fc2db64p+0, -0x3.2a4c56e595394p-56 }, + { -0x5.ffa4bd647d034p+0, -0x1.7dd4ed62cbd32p-52 }, + { -0x6.005ac9625f234p+0, 0x4.9f83d2692e9c8p-56 }, + { -0x6.fff2fddae1bcp+0, 0xc.29d949a3dc03p-60 }, + { -0x7.000cff7b7f87cp+0, 0x1.20bb7d2324678p-52 }, + { -0x7.fffe5fe05673cp+0, -0x3.ca9e82b522b0cp-56 }, + { -0x8.0001a01459fc8p+0, -0x1.f60cb3cec1cedp-52 }, + { -0x8.ffffd1c425e8p+0, -0xf.fc864e9574928p-56 }, + { -0x9.00002e3bb47d8p+0, -0x6.d6d843fedc35p-56 }, + { -0x9.fffffb606bep+0, 0x2.32f9d51885afap-52 }, + { -0xa.0000049f93bb8p+0, -0x1.927b45d95e154p-52 }, + { -0xa.ffffff9466eap+0, 0xe.4c92532d5243p-56 }, + { -0xb.0000006b9915p+0, -0x3.15d965a6ffea4p-52 }, + { -0xb.fffffff708938p+0, -0x7.387de41acc3d4p-56 }, + { -0xc.00000008f76c8p+0, 0x8.cea983f0fdafp-56 }, + { -0xc.ffffffff4f6ep+0, 0x3.09e80685a0038p-52 }, + { -0xd.00000000b092p+0, -0x3.09c06683dd1bap-52 }, + { -0xd.fffffffff3638p+0, 0x3.a5461e7b5c1f6p-52 }, + { -0xe.000000000c9c8p+0, -0x3.a545e94e75ec6p-52 }, + { -0xe.ffffffffff29p+0, 0x3.f9f399fb10cfcp-52 }, + { -0xf.0000000000d7p+0, -0x3.f9f399bd0e42p-52 }, + { -0xf.fffffffffff28p+0, -0xc.060c6621f513p-56 }, + { -0x1.000000000000dp+4, -0x7.3f9f399da1424p-52 }, + { -0x1.0ffffffffffffp+4, -0x3.569c47e7a93e2p-52 }, + { -0x1.1000000000001p+4, 0x3.569c47e7a9778p-52 }, + { -0x1.2p+4, 0xb.413c31dcbecdp-56 }, + { -0x1.2p+4, -0xb.413c31dcbeca8p-56 }, + { -0x1.3p+4, 0x9.7a4da340a0ab8p-60 }, + { -0x1.3p+4, -0x9.7a4da340a0ab8p-60 }, + { -0x1.4p+4, 0x7.950ae90080894p-64 }, + { -0x1.4p+4, -0x7.950ae90080894p-64 }, + { -0x1.5p+4, 0x5.c6e3bdb73d5c8p-68 }, + { -0x1.5p+4, -0x5.c6e3bdb73d5c8p-68 }, + { -0x1.6p+4, 0x4.338e5b6dfe14cp-72 }, + { -0x1.6p+4, -0x4.338e5b6dfe14cp-72 }, + { -0x1.7p+4, 0x2.ec368262c7034p-76 }, + { -0x1.7p+4, -0x2.ec368262c7034p-76 }, + { -0x1.8p+4, 0x1.f2cf01972f578p-80 }, + { -0x1.8p+4, -0x1.f2cf01972f578p-80 }, + { -0x1.9p+4, 0x1.3f3ccdd165fa9p-84 }, + { -0x1.9p+4, -0x1.3f3ccdd165fa9p-84 }, + { -0x1.ap+4, 0xc.4742fe35272dp-92 }, + { -0x1.ap+4, -0xc.4742fe35272dp-92 }, + { -0x1.bp+4, 0x7.46ac70b733a8cp-96 }, + { -0x1.bp+4, -0x7.46ac70b733a8cp-96 }, + { -0x1.cp+4, 0x4.2862898d42174p-100 }, + }; + +static const double e_hi = 0x2.b7e151628aed2p+0, e_lo = 0xa.6abf7158809dp-56; + +/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) in Stirling's + approximation to lgamma function. */ + +static const double lgamma_coeff[] = + { + 0x1.5555555555555p-4, + -0xb.60b60b60b60b8p-12, + 0x3.4034034034034p-12, + -0x2.7027027027028p-12, + 0x3.72a3c5631fe46p-12, + -0x7.daac36664f1f4p-12, + 0x1.a41a41a41a41ap-8, + -0x7.90a1b2c3d4e6p-8, + 0x2.dfd2c703c0dp-4, + -0x1.6476701181f3ap+0, + 0xd.672219167003p+0, + -0x9.cd9292e6660d8p+4, + }; + +#define NCOEFF (sizeof (lgamma_coeff) / sizeof (lgamma_coeff[0])) + +/* Polynomial approximations to (|gamma(x)|-1)(x-n)/(x-x0), where n is + the integer end-point of the half-integer interval containing x and + x0 is the zero of lgamma in that half-integer interval. Each + polynomial is expressed in terms of x-xm, where xm is the midpoint + of the interval for which the polynomial applies. */ + +static const double poly_coeff[] = + { + /* Interval [-2.125, -2] (polynomial degree 10). */ + -0x1.0b71c5c54d42fp+0, + -0xc.73a1dc05f3758p-4, + -0x1.ec84140851911p-4, + -0xe.37c9da23847e8p-4, + -0x1.03cd87cdc0ac6p-4, + -0xe.ae9aedce12eep-4, + 0x9.b11a1780cfd48p-8, + -0xe.f25fc460bdebp-4, + 0x2.6e984c61ca912p-4, + -0xf.83fea1c6d35p-4, + 0x4.760c8c8909758p-4, + /* Interval [-2.25, -2.125] (polynomial degree 11). */ + -0xf.2930890d7d678p-4, + -0xc.a5cfde054eaa8p-4, + 0x3.9c9e0fdebd99cp-4, + -0x1.02a5ad35619d9p+0, + 0x9.6e9b1167c164p-4, + -0x1.4d8332eba090ap+0, + 0x1.1c0c94b1b2b6p+0, + -0x1.c9a70d138c74ep+0, + 0x1.d7d9cf1d4c196p+0, + -0x2.91fbf4cd6abacp+0, + 0x2.f6751f74b8ff8p+0, + -0x3.e1bb7b09e3e76p+0, + /* Interval [-2.375, -2.25] (polynomial degree 12). */ + -0xd.7d28d505d618p-4, + -0xe.69649a3040958p-4, + 0xb.0d74a2827cd6p-4, + -0x1.924b09228a86ep+0, + 0x1.d49b12bcf6175p+0, + -0x3.0898bb530d314p+0, + 0x4.207a6be8fda4cp+0, + -0x6.39eef56d4e9p+0, + 0x8.e2e42acbccec8p+0, + -0xd.0d91c1e596a68p+0, + 0x1.2e20d7099c585p+4, + -0x1.c4eb6691b4ca9p+4, + 0x2.96a1a11fd85fep+4, + /* Interval [-2.5, -2.375] (polynomial degree 13). */ + -0xb.74ea1bcfff948p-4, + -0x1.2a82bd590c376p+0, + 0x1.88020f828b81p+0, + -0x3.32279f040d7aep+0, + 0x5.57ac8252ce868p+0, + -0x9.c2aedd093125p+0, + 0x1.12c132716e94cp+4, + -0x1.ea94dfa5c0a6dp+4, + 0x3.66b61abfe858cp+4, + -0x6.0cfceb62a26e4p+4, + 0xa.beeba09403bd8p+4, + -0x1.3188d9b1b288cp+8, + 0x2.37f774dd14c44p+8, + -0x3.fdf0a64cd7136p+8, + /* Interval [-2.625, -2.5] (polynomial degree 13). */ + -0x3.d10108c27ebbp-4, + 0x1.cd557caff7d2fp+0, + 0x3.819b4856d36cep+0, + 0x6.8505cbacfc42p+0, + 0xb.c1b2e6567a4dp+0, + 0x1.50a53a3ce6c73p+4, + 0x2.57adffbb1ec0cp+4, + 0x4.2b15549cf400cp+4, + 0x7.698cfd82b3e18p+4, + 0xd.2decde217755p+4, + 0x1.7699a624d07b9p+8, + 0x2.98ecf617abbfcp+8, + 0x4.d5244d44d60b4p+8, + 0x8.e962bf7395988p+8, + /* Interval [-2.75, -2.625] (polynomial degree 12). */ + -0x6.b5d252a56e8a8p-4, + 0x1.28d60383da3a6p+0, + 0x1.db6513ada89bep+0, + 0x2.e217118fa8c02p+0, + 0x4.450112c651348p+0, + 0x6.4af990f589b8cp+0, + 0x9.2db5963d7a238p+0, + 0xd.62c03647da19p+0, + 0x1.379f81f6416afp+4, + 0x1.c5618b4fdb96p+4, + 0x2.9342d0af2ac4ep+4, + 0x3.d9cdf56d2b186p+4, + 0x5.ab9f91d5a27a4p+4, + /* Interval [-2.875, -2.75] (polynomial degree 11). */ + -0x8.a41b1e4f36ff8p-4, + 0xc.da87d3b69dbe8p-4, + 0x1.1474ad5c36709p+0, + 0x1.761ecb90c8c5cp+0, + 0x1.d279bff588826p+0, + 0x2.4e5d003fb36a8p+0, + 0x2.d575575566842p+0, + 0x3.85152b0d17756p+0, + 0x4.5213d921ca13p+0, + 0x5.55da7dfcf69c4p+0, + 0x6.acef729b9404p+0, + 0x8.483cc21dd0668p+0, + /* Interval [-3, -2.875] (polynomial degree 11). */ + -0xa.046d667e468f8p-4, + 0x9.70b88dcc006cp-4, + 0xa.a8a39421c94dp-4, + 0xd.2f4d1363f98ep-4, + 0xd.ca9aa19975b7p-4, + 0xf.cf09c2f54404p-4, + 0x1.04b1365a9adfcp+0, + 0x1.22b54ef213798p+0, + 0x1.2c52c25206bf5p+0, + 0x1.4aa3d798aace4p+0, + 0x1.5c3f278b504e3p+0, + 0x1.7e08292cc347bp+0, + }; + +static const size_t poly_deg[] = + { + 10, + 11, + 12, + 13, + 13, + 12, + 11, + 11, + }; + +static const size_t poly_end[] = + { + 10, + 22, + 35, + 49, + 63, + 76, + 88, + 100, + }; + +/* Compute sin (pi * X) for -0.25 <= X <= 0.5. */ + +static double +lg_sinpi (double x) +{ + if (x <= 0.25) + return __sin (M_PI * x); + else + return __cos (M_PI * (0.5 - x)); +} + +/* Compute cos (pi * X) for -0.25 <= X <= 0.5. */ + +static double +lg_cospi (double x) +{ + if (x <= 0.25) + return __cos (M_PI * x); + else + return __sin (M_PI * (0.5 - x)); +} + +/* Compute cot (pi * X) for -0.25 <= X <= 0.5. */ + +static double +lg_cotpi (double x) +{ + return lg_cospi (x) / lg_sinpi (x); +} + +/* Compute lgamma of a negative argument -28 < X < -2, setting + *SIGNGAMP accordingly. */ + +double +__lgamma_neg (double x, int *signgamp) +{ + /* Determine the half-integer region X lies in, handle exact + integers and determine the sign of the result. */ + int i = __floor (-2 * x); + if ((i & 1) == 0 && i == -2 * x) + return 1.0 / 0.0; + double xn = ((i & 1) == 0 ? -i / 2 : (-i - 1) / 2); + i -= 4; + *signgamp = ((i & 2) == 0 ? -1 : 1); + + SET_RESTORE_ROUND (FE_TONEAREST); + + /* Expand around the zero X0 = X0_HI + X0_LO. */ + double x0_hi = lgamma_zeros[i][0], x0_lo = lgamma_zeros[i][1]; + double xdiff = x - x0_hi - x0_lo; + + /* For arguments in the range -3 to -2, use polynomial + approximations to an adjusted version of the gamma function. */ + if (i < 2) + { + int j = __floor (-8 * x) - 16; + double xm = (-33 - 2 * j) * 0.0625; + double x_adj = x - xm; + size_t deg = poly_deg[j]; + size_t end = poly_end[j]; + double g = poly_coeff[end]; + for (size_t j = 1; j <= deg; j++) + g = g * x_adj + poly_coeff[end - j]; + return __log1p (g * xdiff / (x - xn)); + } + + /* The result we want is log (sinpi (X0) / sinpi (X)) + + log (gamma (1 - X0) / gamma (1 - X)). */ + double x_idiff = fabs (xn - x), x0_idiff = fabs (xn - x0_hi - x0_lo); + double log_sinpi_ratio; + if (x0_idiff < x_idiff * 0.5) + /* Use log not log1p to avoid inaccuracy from log1p of arguments + close to -1. */ + log_sinpi_ratio = __ieee754_log (lg_sinpi (x0_idiff) + / lg_sinpi (x_idiff)); + else + { + /* Use log1p not log to avoid inaccuracy from log of arguments + close to 1. X0DIFF2 has positive sign if X0 is further from + XN than X is from XN, negative sign otherwise. */ + double x0diff2 = ((i & 1) == 0 ? xdiff : -xdiff) * 0.5; + double sx0d2 = lg_sinpi (x0diff2); + double cx0d2 = lg_cospi (x0diff2); + log_sinpi_ratio = __log1p (2 * sx0d2 + * (-sx0d2 + cx0d2 * lg_cotpi (x_idiff))); + } + + double log_gamma_ratio; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + double y0_tmp = 1 - x0_hi; + double y0 = y0_tmp; + double y0_eps = -x0_hi + (1 - y0) - x0_lo; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + double y_tmp = 1 - x; + double y = y_tmp; + double y_eps = -x + (1 - y); + /* We now wish to compute LOG_GAMMA_RATIO + = log (gamma (Y0 + Y0_EPS) / gamma (Y + Y_EPS)). XDIFF + accurately approximates the difference Y0 + Y0_EPS - Y - + Y_EPS. Use Stirling's approximation. First, we may need to + adjust into the range where Stirling's approximation is + sufficiently accurate. */ + double log_gamma_adj = 0; + if (i < 6) + { + int n_up = (7 - i) / 2; + double ny0, ny0_eps, ny, ny_eps; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + double y0_tmp = y0 + n_up; + ny0 = y0_tmp; + ny0_eps = y0 - (ny0 - n_up) + y0_eps; + y0 = ny0; + y0_eps = ny0_eps; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + double y_tmp = y + n_up; + ny = y_tmp; + ny_eps = y - (ny - n_up) + y_eps; + y = ny; + y_eps = ny_eps; + double prodm1 = __lgamma_product (xdiff, y - n_up, y_eps, n_up); + log_gamma_adj = -__log1p (prodm1); + } + double log_gamma_high + = (xdiff * __log1p ((y0 - e_hi - e_lo + y0_eps) / e_hi) + + (y - 0.5 + y_eps) * __log1p (xdiff / y) + log_gamma_adj); + /* Compute the sum of (B_2k / 2k(2k-1))(Y0^-(2k-1) - Y^-(2k-1)). */ + double y0r = 1 / y0, yr = 1 / y; + double y0r2 = y0r * y0r, yr2 = yr * yr; + double rdiff = -xdiff / (y * y0); + double bterm[NCOEFF]; + double dlast = rdiff, elast = rdiff * yr * (yr + y0r); + bterm[0] = dlast * lgamma_coeff[0]; + for (size_t j = 1; j < NCOEFF; j++) + { + double dnext = dlast * y0r2 + elast; + double enext = elast * yr2; + bterm[j] = dnext * lgamma_coeff[j]; + dlast = dnext; + elast = enext; + } + double log_gamma_low = 0; + for (size_t j = 0; j < NCOEFF; j++) + log_gamma_low += bterm[NCOEFF - 1 - j]; + log_gamma_ratio = log_gamma_high + log_gamma_low; + + return log_sinpi_ratio + log_gamma_ratio; +} diff --git a/sysdeps/ieee754/dbl-64/lgamma_product.c b/sysdeps/ieee754/dbl-64/lgamma_product.c new file mode 100644 index 0000000000..8f877a8069 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/lgamma_product.c @@ -0,0 +1,82 @@ +/* Compute a product of 1 + (T/X), 1 + (T/(X+1)), .... + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static void +mul_split (double *hi, double *lo, double x, double y) +{ +#ifdef __FP_FAST_FMA + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fma (x, y, -*hi); +#elif defined FP_FAST_FMA + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fma (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) + double x1 = x * C; + double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + double x2 = x - x1; + double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + + 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that + all the values X + 1, ..., X + N - 1 are exactly representable, and + X_EPS / X is small enough that factors quadratic in it can be + neglected. */ + +double +__lgamma_product (double t, double x, double x_eps, int n) +{ + double ret = 0, ret_eps = 0; + for (int i = 0; i < n; i++) + { + double xi = x + i; + double quot = t / xi; + double mhi, mlo; + mul_split (&mhi, &mlo, quot, xi); + double quot_lo = (t - mhi - mlo) / xi - t * x_eps / (xi * xi); + /* We want (1 + RET + RET_EPS) * (1 + QUOT + QUOT_LO) - 1. */ + double rhi, rlo; + mul_split (&rhi, &rlo, ret, quot); + double rpq = ret + quot; + double rpq_eps = (ret - rpq) + quot; + double nret = rpq + rhi; + double nret_eps = (rpq - nret) + rhi; + ret_eps += (rpq_eps + nret_eps + rlo + ret_eps * quot + + quot_lo + quot_lo * (ret + ret_eps)); + ret = nret; + } + return ret + ret_eps; +} diff --git a/sysdeps/ieee754/flt-32/e_lgammaf_r.c b/sysdeps/ieee754/flt-32/e_lgammaf_r.c index c0bf4156ff..424c4e7358 100644 --- a/sysdeps/ieee754/flt-32/e_lgammaf_r.c +++ b/sysdeps/ieee754/flt-32/e_lgammaf_r.c @@ -161,6 +161,9 @@ __ieee754_lgammaf_r(float x, int *signgamp) if(hx<0) { if(ix>=0x4b000000) /* |x|>=2**23, must be -integer */ return x/zero; + if (ix > 0x40000000 /* X < 2.0f. */ + && ix < 0x41700000 /* X > -15.0f. */) + return __lgamma_negf (x, signgamp); t = sin_pif(x); if(t==zero) return one/fabsf(t); /* -integer */ nadj = __ieee754_logf(pi/fabsf(t*x)); diff --git a/sysdeps/ieee754/flt-32/lgamma_negf.c b/sysdeps/ieee754/flt-32/lgamma_negf.c new file mode 100644 index 0000000000..ed9659801e --- /dev/null +++ b/sysdeps/ieee754/flt-32/lgamma_negf.c @@ -0,0 +1,288 @@ +/* lgammaf expanding around zeros. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <math_private.h> + +static const float lgamma_zeros[][2] = + { + { -0x2.74ff94p+0f, 0x1.3fe0f2p-24f }, + { -0x2.bf682p+0f, -0x1.437b2p-24f }, + { -0x3.24c1b8p+0f, 0x6.c34cap-28f }, + { -0x3.f48e2cp+0f, 0x1.707a04p-24f }, + { -0x4.0a13ap+0f, 0x1.e99aap-24f }, + { -0x4.fdd5ep+0f, 0x1.64454p-24f }, + { -0x5.021a98p+0f, 0x2.03d248p-24f }, + { -0x5.ffa4cp+0f, 0x2.9b82fcp-24f }, + { -0x6.005ac8p+0f, -0x1.625f24p-24f }, + { -0x6.fff3p+0f, 0x2.251e44p-24f }, + { -0x7.000dp+0f, 0x8.48078p-28f }, + { -0x7.fffe6p+0f, 0x1.fa98c4p-28f }, + { -0x8.0001ap+0f, -0x1.459fcap-28f }, + { -0x8.ffffdp+0f, -0x1.c425e8p-24f }, + { -0x9.00003p+0f, 0x1.c44b82p-24f }, + { -0xap+0f, 0x4.9f942p-24f }, + { -0xap+0f, -0x4.9f93b8p-24f }, + { -0xbp+0f, 0x6.b9916p-28f }, + { -0xbp+0f, -0x6.b9915p-28f }, + { -0xcp+0f, 0x8.f76c8p-32f }, + { -0xcp+0f, -0x8.f76c7p-32f }, + { -0xdp+0f, 0xb.09231p-36f }, + { -0xdp+0f, -0xb.09231p-36f }, + { -0xep+0f, 0xc.9cba5p-40f }, + { -0xep+0f, -0xc.9cba5p-40f }, + { -0xfp+0f, 0xd.73f9fp-44f }, + }; + +static const float e_hi = 0x2.b7e15p+0f, e_lo = 0x1.628aeep-24f; + +/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) in Stirling's + approximation to lgamma function. */ + +static const float lgamma_coeff[] = + { + 0x1.555556p-4f, + -0xb.60b61p-12f, + 0x3.403404p-12f, + }; + +#define NCOEFF (sizeof (lgamma_coeff) / sizeof (lgamma_coeff[0])) + +/* Polynomial approximations to (|gamma(x)|-1)(x-n)/(x-x0), where n is + the integer end-point of the half-integer interval containing x and + x0 is the zero of lgamma in that half-integer interval. Each + polynomial is expressed in terms of x-xm, where xm is the midpoint + of the interval for which the polynomial applies. */ + +static const float poly_coeff[] = + { + /* Interval [-2.125, -2] (polynomial degree 5). */ + -0x1.0b71c6p+0f, + -0xc.73a1ep-4f, + -0x1.ec8462p-4f, + -0xe.37b93p-4f, + -0x1.02ed36p-4f, + -0xe.cbe26p-4f, + /* Interval [-2.25, -2.125] (polynomial degree 5). */ + -0xf.29309p-4f, + -0xc.a5cfep-4f, + 0x3.9c93fcp-4f, + -0x1.02a2fp+0f, + 0x9.896bep-4f, + -0x1.519704p+0f, + /* Interval [-2.375, -2.25] (polynomial degree 5). */ + -0xd.7d28dp-4f, + -0xe.6964cp-4f, + 0xb.0d4f1p-4f, + -0x1.9240aep+0f, + 0x1.dadabap+0f, + -0x3.1778c4p+0f, + /* Interval [-2.5, -2.375] (polynomial degree 6). */ + -0xb.74ea2p-4f, + -0x1.2a82cp+0f, + 0x1.880234p+0f, + -0x3.320c4p+0f, + 0x5.572a38p+0f, + -0x9.f92bap+0f, + 0x1.1c347ep+4f, + /* Interval [-2.625, -2.5] (polynomial degree 6). */ + -0x3.d10108p-4f, + 0x1.cd5584p+0f, + 0x3.819c24p+0f, + 0x6.84cbb8p+0f, + 0xb.bf269p+0f, + 0x1.57fb12p+4f, + 0x2.7b9854p+4f, + /* Interval [-2.75, -2.625] (polynomial degree 6). */ + -0x6.b5d25p-4f, + 0x1.28d604p+0f, + 0x1.db6526p+0f, + 0x2.e20b38p+0f, + 0x4.44c378p+0f, + 0x6.62a08p+0f, + 0x9.6db3ap+0f, + /* Interval [-2.875, -2.75] (polynomial degree 5). */ + -0x8.a41b2p-4f, + 0xc.da87fp-4f, + 0x1.147312p+0f, + 0x1.7617dap+0f, + 0x1.d6c13p+0f, + 0x2.57a358p+0f, + /* Interval [-3, -2.875] (polynomial degree 5). */ + -0xa.046d6p-4f, + 0x9.70b89p-4f, + 0xa.a89a6p-4f, + 0xd.2f2d8p-4f, + 0xd.e32b4p-4f, + 0xf.fb741p-4f, + }; + +static const size_t poly_deg[] = + { + 5, + 5, + 5, + 6, + 6, + 6, + 5, + 5, + }; + +static const size_t poly_end[] = + { + 5, + 11, + 17, + 24, + 31, + 38, + 44, + 50, + }; + +/* Compute sin (pi * X) for -0.25 <= X <= 0.5. */ + +static float +lg_sinpi (float x) +{ + if (x <= 0.25f) + return __sinf ((float) M_PI * x); + else + return __cosf ((float) M_PI * (0.5f - x)); +} + +/* Compute cos (pi * X) for -0.25 <= X <= 0.5. */ + +static float +lg_cospi (float x) +{ + if (x <= 0.25f) + return __cosf ((float) M_PI * x); + else + return __sinf ((float) M_PI * (0.5f - x)); +} + +/* Compute cot (pi * X) for -0.25 <= X <= 0.5. */ + +static float +lg_cotpi (float x) +{ + return lg_cospi (x) / lg_sinpi (x); +} + +/* Compute lgamma of a negative argument -15 < X < -2, setting + *SIGNGAMP accordingly. */ + +float +__lgamma_negf (float x, int *signgamp) +{ + /* Determine the half-integer region X lies in, handle exact + integers and determine the sign of the result. */ + int i = __floorf (-2 * x); + if ((i & 1) == 0 && i == -2 * x) + return 1.0f / 0.0f; + float xn = ((i & 1) == 0 ? -i / 2 : (-i - 1) / 2); + i -= 4; + *signgamp = ((i & 2) == 0 ? -1 : 1); + + SET_RESTORE_ROUNDF (FE_TONEAREST); + + /* Expand around the zero X0 = X0_HI + X0_LO. */ + float x0_hi = lgamma_zeros[i][0], x0_lo = lgamma_zeros[i][1]; + float xdiff = x - x0_hi - x0_lo; + + /* For arguments in the range -3 to -2, use polynomial + approximations to an adjusted version of the gamma function. */ + if (i < 2) + { + int j = __floorf (-8 * x) - 16; + float xm = (-33 - 2 * j) * 0.0625f; + float x_adj = x - xm; + size_t deg = poly_deg[j]; + size_t end = poly_end[j]; + float g = poly_coeff[end]; + for (size_t j = 1; j <= deg; j++) + g = g * x_adj + poly_coeff[end - j]; + return __log1pf (g * xdiff / (x - xn)); + } + + /* The result we want is log (sinpi (X0) / sinpi (X)) + + log (gamma (1 - X0) / gamma (1 - X)). */ + float x_idiff = fabsf (xn - x), x0_idiff = fabsf (xn - x0_hi - x0_lo); + float log_sinpi_ratio; + if (x0_idiff < x_idiff * 0.5f) + /* Use log not log1p to avoid inaccuracy from log1p of arguments + close to -1. */ + log_sinpi_ratio = __ieee754_logf (lg_sinpi (x0_idiff) + / lg_sinpi (x_idiff)); + else + { + /* Use log1p not log to avoid inaccuracy from log of arguments + close to 1. X0DIFF2 has positive sign if X0 is further from + XN than X is from XN, negative sign otherwise. */ + float x0diff2 = ((i & 1) == 0 ? xdiff : -xdiff) * 0.5f; + float sx0d2 = lg_sinpi (x0diff2); + float cx0d2 = lg_cospi (x0diff2); + log_sinpi_ratio = __log1pf (2 * sx0d2 + * (-sx0d2 + cx0d2 * lg_cotpi (x_idiff))); + } + + float log_gamma_ratio; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + float y0_tmp = 1 - x0_hi; + float y0 = y0_tmp; + float y0_eps = -x0_hi + (1 - y0) - x0_lo; +#if FLT_EVAL_METHOD != 0 + volatile +#endif + float y_tmp = 1 - x; + float y = y_tmp; + float y_eps = -x + (1 - y); + /* We now wish to compute LOG_GAMMA_RATIO + = log (gamma (Y0 + Y0_EPS) / gamma (Y + Y_EPS)). XDIFF + accurately approximates the difference Y0 + Y0_EPS - Y - + Y_EPS. Use Stirling's approximation. */ + float log_gamma_high + = (xdiff * __log1pf ((y0 - e_hi - e_lo + y0_eps) / e_hi) + + (y - 0.5f + y_eps) * __log1pf (xdiff / y)); + /* Compute the sum of (B_2k / 2k(2k-1))(Y0^-(2k-1) - Y^-(2k-1)). */ + float y0r = 1 / y0, yr = 1 / y; + float y0r2 = y0r * y0r, yr2 = yr * yr; + float rdiff = -xdiff / (y * y0); + float bterm[NCOEFF]; + float dlast = rdiff, elast = rdiff * yr * (yr + y0r); + bterm[0] = dlast * lgamma_coeff[0]; + for (size_t j = 1; j < NCOEFF; j++) + { + float dnext = dlast * y0r2 + elast; + float enext = elast * yr2; + bterm[j] = dnext * lgamma_coeff[j]; + dlast = dnext; + elast = enext; + } + float log_gamma_low = 0; + for (size_t j = 0; j < NCOEFF; j++) + log_gamma_low += bterm[NCOEFF - 1 - j]; + log_gamma_ratio = log_gamma_high + log_gamma_low; + + return log_sinpi_ratio + log_gamma_ratio; +} diff --git a/sysdeps/ieee754/flt-32/lgamma_productf.c b/sysdeps/ieee754/flt-32/lgamma_productf.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/ieee754/flt-32/lgamma_productf.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ieee754/ldbl-128/e_lgammal_r.c b/sysdeps/ieee754/ldbl-128/e_lgammal_r.c index d8a5e5b9ec..abf0f15995 100644 --- a/sysdeps/ieee754/ldbl-128/e_lgammal_r.c +++ b/sysdeps/ieee754/ldbl-128/e_lgammal_r.c @@ -781,6 +781,8 @@ __ieee754_lgammal_r (long double x, int *signgamp) if (x < 0.0L) { + if (x < -2.0L && x > (LDBL_MANT_DIG == 106 ? -48.0L : -50.0L)) + return __lgamma_negl (x, signgamp); q = -x; p = __floorl (q); if (p == q) diff --git a/sysdeps/ieee754/ldbl-128/lgamma_negl.c b/sysdeps/ieee754/ldbl-128/lgamma_negl.c new file mode 100644 index 0000000000..d68b93676e --- /dev/null +++ b/sysdeps/ieee754/ldbl-128/lgamma_negl.c @@ -0,0 +1,551 @@ +/* lgammal expanding around zeros. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <math_private.h> + +static const long double lgamma_zeros[][2] = + { + { -0x2.74ff92c01f0d82abec9f315f1a08p+0L, 0xe.d3ccb7fb2658634a2b9f6b2ba81p-116L }, + { -0x2.bf6821437b20197995a4b4641eaep+0L, -0xb.f4b00b4829f961e428533e6ad048p-116L }, + { -0x3.24c1b793cb35efb8be699ad3d9bap+0L, -0x6.5454cb7fac60e3f16d9d7840c2ep-116L }, + { -0x3.f48e2a8f85fca170d4561291236cp+0L, -0xc.320a4887d1cb4c711828a75d5758p-116L }, + { -0x4.0a139e16656030c39f0b0de18114p+0L, 0x1.53e84029416e1242006b2b3d1cfp-112L }, + { -0x4.fdd5de9bbabf3510d0aa40769884p+0L, -0x1.01d7d78125286f78d1e501f14966p-112L }, + { -0x5.021a95fc2db6432a4c56e595394cp+0L, -0x1.ecc6af0430d4fe5746fa7233356fp-112L }, + { -0x5.ffa4bd647d0357dd4ed62cbd31ecp+0L, -0x1.f8e3f8e5deba2d67dbd70dd96ce1p-112L }, + { -0x6.005ac9625f233b607c2d96d16384p+0L, -0x1.cb86ac569340cf1e5f24df7aab7bp-112L }, + { -0x6.fff2fddae1bbff3d626b65c23fd4p+0L, 0x1.e0bfcff5c457ebcf4d3ad9674167p-112L }, + { -0x7.000cff7b7f87adf4482dcdb98784p+0L, 0x1.54d99e35a74d6407b80292df199fp-112L }, + { -0x7.fffe5fe05673c3ca9e82b522b0ccp+0L, 0x1.62d177c832e0eb42c2faffd1b145p-112L }, + { -0x8.0001a01459fc9f60cb3cec1cec88p+0L, 0x2.8998835ac7277f7bcef67c47f188p-112L }, + { -0x8.ffffd1c425e80ffc864e95749258p+0L, -0x1.e7e20210e7f81cf781b44e9d2b02p-112L }, + { -0x9.00002e3bb47d86d6d843fedc352p+0L, 0x2.14852f613a16291751d2ab751f7ep-112L }, + { -0x9.fffffb606bdfdcd062ae77a50548p+0L, 0x3.962d1490cc2e8f031c7007eaa1ap-116L }, + { -0xa.0000049f93bb9927b45d95e1544p+0L, -0x1.e03086db9146a9287bd4f2172d5ap-112L }, + { -0xa.ffffff9466e9f1b36dacd2adbd18p+0L, -0xd.05a4e458062f3f95345a4d9c9b6p-116L }, + { -0xb.0000006b9915315d965a6ffea41p+0L, 0x1.b415c6fff233e7b7fdc3a094246fp-112L }, + { -0xb.fffffff7089387387de41acc3d4p+0L, 0x3.687427c6373bd74a10306e10a28ep-112L }, + { -0xc.00000008f76c7731567c0f0250fp+0L, -0x3.87920df5675833859190eb128ef6p-112L }, + { -0xc.ffffffff4f6dcf617f97a5ffc758p+0L, 0x2.ab72d76f32eaee2d1a42ed515d3ap-116L }, + { -0xd.00000000b092309c06683dd1b9p+0L, -0x3.e3700857a15c19ac5a611de9688ap-112L }, + { -0xd.fffffffff36345ab9e184a3e09dp+0L, -0x1.176dc48e47f62d917973dd44e553p-112L }, + { -0xe.000000000c9cba545e94e75ec57p+0L, -0x1.8f753e2501e757a17cf2ecbeeb89p-112L }, + { -0xe.ffffffffff28c060c6604ef3037p+0L, -0x1.f89d37357c9e3dc17c6c6e63becap-112L }, + { -0xf.0000000000d73f9f399bd0e420f8p+0L, -0x5.e9ee31b0b890744fc0e3fbc01048p-116L }, + { -0xf.fffffffffff28c060c6621f512e8p+0L, 0xd.1b2eec9d960bd9adc5be5f5fa5p-116L }, + { -0x1.000000000000d73f9f399da1424cp+4L, 0x6.c46e0e88305d2800f0e414c506a8p-116L }, + { -0x1.0ffffffffffff3569c47e7a93e1cp+4L, -0x4.6a08a2e008a998ebabb8087efa2cp-112L }, + { -0x1.1000000000000ca963b818568887p+4L, -0x6.ca5a3a64ec15db0a95caf2c9ffb4p-112L }, + { -0x1.1fffffffffffff4bec3ce234132dp+4L, -0x8.b2b726187c841cb92cd5221e444p-116L }, + { -0x1.20000000000000b413c31dcbeca5p+4L, 0x3.c4d005344b6cd0e7231120294abcp-112L }, + { -0x1.2ffffffffffffff685b25cbf5f54p+4L, -0x5.ced932e38485f7dd296b8fa41448p-112L }, + { -0x1.30000000000000097a4da340a0acp+4L, 0x7.e484e0e0ffe38d406ebebe112f88p-112L }, + { -0x1.3fffffffffffffff86af516ff7f7p+4L, -0x6.bd67e720d57854502b7db75e1718p-112L }, + { -0x1.40000000000000007950ae900809p+4L, 0x6.bec33375cac025d9c073168c5d9p-112L }, + { -0x1.4ffffffffffffffffa391c4248c3p+4L, 0x5.c63022b62b5484ba346524db607p-112L }, + { -0x1.500000000000000005c6e3bdb73dp+4L, -0x5.c62f55ed5322b2685c5e9a51e6a8p-112L }, + { -0x1.5fffffffffffffffffbcc71a492p+4L, -0x1.eb5aeb96c74d7ad25e060528fb5p-112L }, + { -0x1.6000000000000000004338e5b6ep+4L, 0x1.eb5aec04b2f2eb663e4e3d8a018cp-112L }, + { -0x1.6ffffffffffffffffffd13c97d9dp+4L, -0x3.8fcc4d08d6fe5aa56ab04307ce7ep-112L }, + { -0x1.70000000000000000002ec368263p+4L, 0x3.8fcc4d090cee2f5d0b69a99c353cp-112L }, + { -0x1.7fffffffffffffffffffe0d30fe7p+4L, 0x7.2f577cca4b4c8cb1dc14001ac5ecp-112L }, + { -0x1.800000000000000000001f2cf019p+4L, -0x7.2f577cca4b3442e35f0040b3b9e8p-112L }, + { -0x1.8ffffffffffffffffffffec0c332p+4L, -0x2.e9a0572b1bb5b95f346a92d67a6p-112L }, + { -0x1.90000000000000000000013f3ccep+4L, 0x2.e9a0572b1bb5c371ddb3561705ap-112L }, + { -0x1.9ffffffffffffffffffffff3b8bdp+4L, -0x1.cad8d32e386fd783e97296d63dcbp-116L }, + { -0x1.a0000000000000000000000c4743p+4L, 0x1.cad8d32e386fd7c1ab8c1fe34c0ep-116L }, + { -0x1.afffffffffffffffffffffff8b95p+4L, -0x3.8f48cc5737d5979c39db806c5406p-112L }, + { -0x1.b00000000000000000000000746bp+4L, 0x3.8f48cc5737d5979c3b3a6bda06f6p-112L }, + { -0x1.bffffffffffffffffffffffffbd8p+4L, 0x6.2898d42174dcf171470d8c8c6028p-112L }, + { -0x1.c000000000000000000000000428p+4L, -0x6.2898d42174dcf171470d18ba412cp-112L }, + { -0x1.cfffffffffffffffffffffffffdbp+4L, -0x4.c0ce9794ea50a839e311320bde94p-112L }, + { -0x1.d000000000000000000000000025p+4L, 0x4.c0ce9794ea50a839e311322f7cf8p-112L }, + { -0x1.dfffffffffffffffffffffffffffp+4L, 0x3.932c5047d60e60caded4c298a174p-112L }, + { -0x1.e000000000000000000000000001p+4L, -0x3.932c5047d60e60caded4c298973ap-112L }, + { -0x1.fp+4L, 0xa.1a6973c1fade2170f7237d35fe3p-116L }, + { -0x1.fp+4L, -0xa.1a6973c1fade2170f7237d35fe08p-116L }, + { -0x2p+4L, 0x5.0d34b9e0fd6f10b87b91be9aff1p-120L }, + { -0x2p+4L, -0x5.0d34b9e0fd6f10b87b91be9aff0cp-120L }, + { -0x2.1p+4L, 0x2.73024a9ba1aa36a7059bff52e844p-124L }, + { -0x2.1p+4L, -0x2.73024a9ba1aa36a7059bff52e844p-124L }, + { -0x2.2p+4L, 0x1.2710231c0fd7a13f8a2b4af9d6b7p-128L }, + { -0x2.2p+4L, -0x1.2710231c0fd7a13f8a2b4af9d6b7p-128L }, + { -0x2.3p+4L, 0x8.6e2ce38b6c8f9419e3fad3f0312p-136L }, + { -0x2.3p+4L, -0x8.6e2ce38b6c8f9419e3fad3f0312p-136L }, + { -0x2.4p+4L, 0x3.bf30652185952560d71a254e4eb8p-140L }, + { -0x2.4p+4L, -0x3.bf30652185952560d71a254e4eb8p-140L }, + { -0x2.5p+4L, 0x1.9ec8d1c94e85af4c78b15c3d89d3p-144L }, + { -0x2.5p+4L, -0x1.9ec8d1c94e85af4c78b15c3d89d3p-144L }, + { -0x2.6p+4L, 0xa.ea565ce061d57489e9b85276274p-152L }, + { -0x2.6p+4L, -0xa.ea565ce061d57489e9b85276274p-152L }, + { -0x2.7p+4L, 0x4.7a6512692eb37804111dabad30ecp-156L }, + { -0x2.7p+4L, -0x4.7a6512692eb37804111dabad30ecp-156L }, + { -0x2.8p+4L, 0x1.ca8ed42a12ae3001a07244abad2bp-160L }, + { -0x2.8p+4L, -0x1.ca8ed42a12ae3001a07244abad2bp-160L }, + { -0x2.9p+4L, 0xb.2f30e1ce812063f12e7e8d8d96e8p-168L }, + { -0x2.9p+4L, -0xb.2f30e1ce812063f12e7e8d8d96e8p-168L }, + { -0x2.ap+4L, 0x4.42bd49d4c37a0db136489772e428p-172L }, + { -0x2.ap+4L, -0x4.42bd49d4c37a0db136489772e428p-172L }, + { -0x2.bp+4L, 0x1.95db45257e5122dcbae56def372p-176L }, + { -0x2.bp+4L, -0x1.95db45257e5122dcbae56def372p-176L }, + { -0x2.cp+4L, 0x9.3958d81ff63527ecf993f3fb6f48p-184L }, + { -0x2.cp+4L, -0x9.3958d81ff63527ecf993f3fb6f48p-184L }, + { -0x2.dp+4L, 0x3.47970e4440c8f1c058bd238c9958p-188L }, + { -0x2.dp+4L, -0x3.47970e4440c8f1c058bd238c9958p-188L }, + { -0x2.ep+4L, 0x1.240804f65951062ca46e4f25c608p-192L }, + { -0x2.ep+4L, -0x1.240804f65951062ca46e4f25c608p-192L }, + { -0x2.fp+4L, 0x6.36a382849fae6de2d15362d8a394p-200L }, + { -0x2.fp+4L, -0x6.36a382849fae6de2d15362d8a394p-200L }, + { -0x3p+4L, 0x2.123680d6dfe4cf4b9b1bcb9d8bdcp-204L }, + { -0x3p+4L, -0x2.123680d6dfe4cf4b9b1bcb9d8bdcp-204L }, + { -0x3.1p+4L, 0xa.d21786ff5842eca51fea0870919p-212L }, + { -0x3.1p+4L, -0xa.d21786ff5842eca51fea0870919p-212L }, + { -0x3.2p+4L, 0x3.766dedc259af040be140a68a6c04p-216L }, + }; + +static const long double e_hi = 0x2.b7e151628aed2a6abf7158809cf4p+0L; +static const long double e_lo = 0xf.3c762e7160f38b4da56a784d9048p-116L; + + +/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) in Stirling's + approximation to lgamma function. */ + +static const long double lgamma_coeff[] = + { + 0x1.5555555555555555555555555555p-4L, + -0xb.60b60b60b60b60b60b60b60b60b8p-12L, + 0x3.4034034034034034034034034034p-12L, + -0x2.7027027027027027027027027028p-12L, + 0x3.72a3c5631fe46ae1d4e700dca8f2p-12L, + -0x7.daac36664f1f207daac36664f1f4p-12L, + 0x1.a41a41a41a41a41a41a41a41a41ap-8L, + -0x7.90a1b2c3d4e5f708192a3b4c5d7p-8L, + 0x2.dfd2c703c0cfff430edfd2c703cp-4L, + -0x1.6476701181f39edbdb9ce625987dp+0L, + 0xd.672219167002d3a7a9c886459cp+0L, + -0x9.cd9292e6660d55b3f712eb9e07c8p+4L, + 0x8.911a740da740da740da740da741p+8L, + -0x8.d0cc570e255bf59ff6eec24b49p+12L, + 0xa.8d1044d3708d1c219ee4fdc446ap+16L, + -0xe.8844d8a169abbc406169abbc406p+20L, + 0x1.6d29a0f6433b79890cede62433b8p+28L, + -0x2.88a233b3c8cddaba9809357125d8p+32L, + 0x5.0dde6f27500939a85c40939a85c4p+36L, + -0xb.4005bde03d4642a243581714af68p+40L, + 0x1.bc8cd6f8f1f755c78753cdb5d5c9p+48L, + -0x4.bbebb143bb94de5a0284fa7ec424p+52L, + 0xe.2e1337f5af0bed90b6b0a352d4fp+56L, + -0x2.e78250162b62405ad3e4bfe61b38p+64L, + 0xa.5f7eef9e71ac7c80326ab4cc8bfp+68L, + -0x2.83be0395e550213369924971b21ap+76L, + 0xa.8ebfe48da17dd999790760b0cep+80L, + }; + +#define NCOEFF (sizeof (lgamma_coeff) / sizeof (lgamma_coeff[0])) + +/* Polynomial approximations to (|gamma(x)|-1)(x-n)/(x-x0), where n is + the integer end-point of the half-integer interval containing x and + x0 is the zero of lgamma in that half-integer interval. Each + polynomial is expressed in terms of x-xm, where xm is the midpoint + of the interval for which the polynomial applies. */ + +static const long double poly_coeff[] = + { + /* Interval [-2.125, -2] (polynomial degree 23). */ + -0x1.0b71c5c54d42eb6c17f30b7aa8f5p+0L, + -0xc.73a1dc05f34951602554c6d7506p-4L, + -0x1.ec841408528b51473e6c425ee5ffp-4L, + -0xe.37c9da26fc3c9a3c1844c8c7f1cp-4L, + -0x1.03cd87c519305703b021fa33f827p-4L, + -0xe.ae9ada65e09aa7f1c75216128f58p-4L, + 0x9.b11855a4864b5731cf85736015a8p-8L, + -0xe.f28c133e697a95c28607c9701dep-4L, + 0x2.6ec14a1c586a72a7cc33ee569d6ap-4L, + -0xf.57cab973e14464a262fc24723c38p-4L, + 0x4.5b0fc25f16e52997b2886bbae808p-4L, + -0xf.f50e59f1a9b56e76e988dac9ccf8p-4L, + 0x6.5f5eae15e9a93369e1d85146c6fcp-4L, + -0x1.0d2422daac459e33e0994325ed23p+0L, + 0x8.82000a0e7401fb1117a0e6606928p-4L, + -0x1.1f492f178a3f1b19f58a2ca68e55p+0L, + 0xa.cb545f949899a04c160b19389abp-4L, + -0x1.36165a1b155ba3db3d1b77caf498p+0L, + 0xd.44c5d5576f74302e5cf79e183eep-4L, + -0x1.51f22e0cdd33d3d481e326c02f3ep+0L, + 0xf.f73a349c08244ac389c007779bfp-4L, + -0x1.73317bf626156ba716747c4ca866p+0L, + 0x1.379c3c97b9bc71e1c1c4802dd657p+0L, + -0x1.a72a351c54f902d483052000f5dfp+0L, + /* Interval [-2.25, -2.125] (polynomial degree 24). */ + -0xf.2930890d7d675a80c36afb0fd5e8p-4L, + -0xc.a5cfde054eab5c6770daeca577f8p-4L, + 0x3.9c9e0fdebb07cdf89c61d41c9238p-4L, + -0x1.02a5ad35605fcf4af65a6dbacb84p+0L, + 0x9.6e9b1185bb48be9de1918e00a2e8p-4L, + -0x1.4d8332f3cfbfa116fd611e9ce90dp+0L, + 0x1.1c0c8cb4d9f4b1d490e1a41fae4dp+0L, + -0x1.c9a6f5ae9130cd0299e293a42714p+0L, + 0x1.d7e9307fd58a2ea997f29573a112p+0L, + -0x2.921cb3473d96178ca2a11d2a8d46p+0L, + 0x2.e8d59113b6f3409ff8db226e9988p+0L, + -0x3.cbab931625a1ae2b26756817f264p+0L, + 0x4.7d9f0f05d5296d18663ca003912p+0L, + -0x5.ade9cba12a14ea485667b7135bbp+0L, + 0x6.dc983a5da74fb48e767b7fec0a3p+0L, + -0x8.8d9ed454ae31d9e138dd8ee0d1a8p+0L, + 0xa.6fa099d4e7c202e0c0fd6ed8492p+0L, + -0xc.ebc552a8090a0f0115e92d4ebbc8p+0L, + 0xf.d695e4772c0d829b53fba9ca5568p+0L, + -0x1.38c32ae38e5e9eb79b2a4c5570a9p+4L, + 0x1.8035145646cfab49306d0999a51bp+4L, + -0x1.d930adbb03dd342a4c2a8c4e1af6p+4L, + 0x2.45c2edb1b4943ddb3686cd9c6524p+4L, + -0x2.e818ebbfafe2f916fa21abf7756p+4L, + 0x3.9804ce51d0fb9a430a711fd7307p+4L, + /* Interval [-2.375, -2.25] (polynomial degree 25). */ + -0xd.7d28d505d6181218a25f31d5e45p-4L, + -0xe.69649a3040985140cdf946829fap-4L, + 0xb.0d74a2827d053a8d44595012484p-4L, + -0x1.924b0922853617cac181afbc08ddp+0L, + 0x1.d49b12bccf0a568582e2d3c410f3p+0L, + -0x3.0898bb7d8c4093e636279c791244p+0L, + 0x4.207a6cac711cb53868e8a5057eep+0L, + -0x6.39ee63ea4fb1dcab0c9144bf3ddcp+0L, + 0x8.e2e2556a797b649bf3f53bd26718p+0L, + -0xd.0e83ac82552ef12af508589e7a8p+0L, + 0x1.2e4525e0ce6670563c6484a82b05p+4L, + -0x1.b8e350d6a8f2b222fa390a57c23dp+4L, + 0x2.805cd69b919087d8a80295892c2cp+4L, + -0x3.a42585424a1b7e64c71743ab014p+4L, + 0x5.4b4f409f98de49f7bfb03c05f984p+4L, + -0x7.b3c5827fbe934bc820d6832fb9fcp+4L, + 0xb.33b7b90cc96c425526e0d0866e7p+4L, + -0x1.04b77047ac4f59ee3775ca10df0dp+8L, + 0x1.7b366f5e94a34f41386eac086313p+8L, + -0x2.2797338429385c9849ca6355bfc2p+8L, + 0x3.225273cf92a27c9aac1b35511256p+8L, + -0x4.8f078aa48afe6cb3a4e89690f898p+8L, + 0x6.9f311d7b6654fc1d0b5195141d04p+8L, + -0x9.a0c297b6b4621619ca9bacc48ed8p+8L, + 0xe.ce1f06b6f90d92138232a76e4cap+8L, + -0x1.5b0e6806fa064daf011613e43b17p+12L, + /* Interval [-2.5, -2.375] (polynomial degree 27). */ + -0xb.74ea1bcfff94b2c01afba9daa7d8p-4L, + -0x1.2a82bd590c37538cab143308de4dp+0L, + 0x1.88020f828b966fec66b8649fd6fcp+0L, + -0x3.32279f040eb694970e9db24863dcp+0L, + 0x5.57ac82517767e68a721005853864p+0L, + -0x9.c2aedcfe22833de43834a0a6cc4p+0L, + 0x1.12c132f1f5577f99e1a0ed3538e1p+4L, + -0x1.ea94e26628a3de3597f7bb55a948p+4L, + 0x3.66b4ac4fa582f58b59f96b2f7c7p+4L, + -0x6.0cf746a9cf4cba8c39afcc73fc84p+4L, + 0xa.c102ef2c20d75a342197df7fedf8p+4L, + -0x1.31ebff06e8f14626782df58db3b6p+8L, + 0x2.1fd6f0c0e710994e059b9dbdb1fep+8L, + -0x3.c6d76040407f447f8b5074f07706p+8L, + 0x6.b6d18e0d8feb4c2ef5af6a40ed18p+8L, + -0xb.efaf542c529f91e34217f24ae6a8p+8L, + 0x1.53852d873210e7070f5d9eb2296p+12L, + -0x2.5b977c0ddc6d540717173ac29fc8p+12L, + 0x4.310d452ae05100eff1e02343a724p+12L, + -0x7.73a5d8f20c4f986a7dd1912b2968p+12L, + 0xd.3f5ea2484f3fca15eab1f4d1a218p+12L, + -0x1.78d18aac156d1d93a2ffe7e08d3fp+16L, + 0x2.9df49ca75e5b567f5ea3e47106cp+16L, + -0x4.a7149af8961a08aa7c3233b5bb94p+16L, + 0x8.3db10ffa742c707c25197d989798p+16L, + -0xe.a26d6dd023cadd02041a049ec368p+16L, + 0x1.c825d90514e7c57c7fa5316f947cp+20L, + -0x3.34bb81e5a0952df8ca1abdc6684cp+20L, + /* Interval [-2.625, -2.5] (polynomial degree 28). */ + -0x3.d10108c27ebafad533c20eac32bp-4L, + 0x1.cd557caff7d2b2085f41dbec5106p+0L, + 0x3.819b4856d399520dad9776ea2cacp+0L, + 0x6.8505cbad03dc34c5e42e8b12eb78p+0L, + 0xb.c1b2e653a9e38f82b399c94e7f08p+0L, + 0x1.50a53a38f148138105124df65419p+4L, + 0x2.57ae00cbe5232cbeeed34d89727ap+4L, + 0x4.2b156301b8604db85a601544bfp+4L, + 0x7.6989ed23ca3ca7579b3462592b5cp+4L, + 0xd.2dd2976557939517f831f5552cc8p+4L, + 0x1.76e1c3430eb860969bce40cd494p+8L, + 0x2.9a77bf5488742466db3a2c7c1ec6p+8L, + 0x4.a0d62ed7266e8eb36f725a8ebcep+8L, + 0x8.3a6184dd3021067df2f8b91e99c8p+8L, + 0xe.a0ade1538245bf55d39d7e436b1p+8L, + 0x1.a01359fae8617b5826dd74428e9p+12L, + 0x2.e3b0a32caae77251169acaca1ad4p+12L, + 0x5.2301257c81589f62b38fb5993ee8p+12L, + 0x9.21c9275db253d4e719b73b18cb9p+12L, + 0x1.03c104bc96141cda3f3fa4b112bcp+16L, + 0x1.cdc8ed65119196a08b0c78f1445p+16L, + 0x3.34f31d2eaacf34382cdb0073572ap+16L, + 0x5.b37628cadf12bf0000907d0ef294p+16L, + 0xa.22d8b332c0b1e6a616f425dfe5ap+16L, + 0x1.205b01444804c3ff922cd78b4c42p+20L, + 0x1.fe8f0cea9d1e0ff25be2470b4318p+20L, + 0x3.8872aebeb368399aee02b39340aep+20L, + 0x6.ebd560d351e84e26a4381f5b293cp+20L, + 0xc.c3644d094b0dae2fbcbf682cd428p+20L, + /* Interval [-2.75, -2.625] (polynomial degree 26). */ + -0x6.b5d252a56e8a75458a27ed1c2dd4p-4L, + 0x1.28d60383da3ac721aed3c5794da9p+0L, + 0x1.db6513ada8a66ea77d87d9a8827bp+0L, + 0x2.e217118f9d348a27f7506a707e6ep+0L, + 0x4.450112c5cbf725a0fb9802396c9p+0L, + 0x6.4af99151eae7810a75df2a0303c4p+0L, + 0x9.2db598b4a97a7f69aeef32aec758p+0L, + 0xd.62bef9c22471f5ee47ea1b9c0b5p+0L, + 0x1.379f294e412bd62328326d4222f9p+4L, + 0x1.c5827349d8865f1e8825c37c31c6p+4L, + 0x2.93a7e7a75b7568cc8cbe8c016c12p+4L, + 0x3.bf9bb882afe57edb383d41879d3ap+4L, + 0x5.73c737828cee095c43a5566731c8p+4L, + 0x7.ee4653493a7f81e0442062b3823cp+4L, + 0xb.891c6b83fc8b55bd973b5d962d6p+4L, + 0x1.0c775d7de3bf9b246c0208e0207ep+8L, + 0x1.867ee43ec4bd4f4fd56abc05110ap+8L, + 0x2.37fe9ba6695821e9822d8c8af0a6p+8L, + 0x3.3a2c667e37c942f182cd3223a936p+8L, + 0x4.b1b500eb59f3f782c7ccec88754p+8L, + 0x6.d3efd3b65b3d0d8488d30b79fa4cp+8L, + 0x9.ee8224e65bed5ced8b75eaec609p+8L, + 0xe.72416e510cca77d53fc615c1f3dp+8L, + 0x1.4fb538b0a2dfe567a8904b7e0445p+12L, + 0x1.e7f56a9266cf525a5b8cf4cb76cep+12L, + 0x2.f0365c983f68c597ee49d099cce8p+12L, + 0x4.53aa229e1b9f5b5e59625265951p+12L, + /* Interval [-2.875, -2.75] (polynomial degree 24). */ + -0x8.a41b1e4f36ff88dc820815607d68p-4L, + 0xc.da87d3b69dc0f2f9c6f368b8ca1p-4L, + 0x1.1474ad5c36158a7bea04fd2f98c6p+0L, + 0x1.761ecb90c555df6555b7dba955b6p+0L, + 0x1.d279bff9ae291caf6c4b4bcb3202p+0L, + 0x2.4e5d00559a6e2b9b5d7fe1f6689cp+0L, + 0x2.d57545a75cee8743ae2b17bc8d24p+0L, + 0x3.8514eee3aac88b89bec2307021bap+0L, + 0x4.5235e3b6e1891ffeb87fed9f8a24p+0L, + 0x5.562acdb10eef3c9a773b3e27a864p+0L, + 0x6.8ec8965c76efe03c26bff60b1194p+0L, + 0x8.15251aca144877af32658399f9b8p+0L, + 0x9.f08d56aba174d844138af782c0f8p+0L, + 0xc.3dbbeda2679e8a1346ccc3f6da88p+0L, + 0xf.0f5bfd5eacc26db308ffa0556fa8p+0L, + 0x1.28a6ccd84476fbc713d6bab49ac9p+4L, + 0x1.6d0a3ae2a3b1c8ff400641a3a21fp+4L, + 0x1.c15701b28637f87acfb6a91d33b5p+4L, + 0x2.28fbe0eccf472089b017651ca55ep+4L, + 0x2.a8a453004f6e8ffaacd1603bc3dp+4L, + 0x3.45ae4d9e1e7cd1a5dba0e4ec7f6cp+4L, + 0x4.065fbfacb7fad3e473cb577a61e8p+4L, + 0x4.f3d1473020927acac1944734a39p+4L, + 0x6.54bb091245815a36fb74e314dd18p+4L, + 0x7.d7f445129f7fb6c055e582d3f6ep+4L, + /* Interval [-3, -2.875] (polynomial degree 23). */ + -0xa.046d667e468f3e44dcae1afcc648p-4L, + 0x9.70b88dcc006c214d8d996fdf5ccp-4L, + 0xa.a8a39421c86d3ff24931a0929fp-4L, + 0xd.2f4d1363f324da2b357c8b6ec94p-4L, + 0xd.ca9aa1a3a5c00de11bf60499a97p-4L, + 0xf.cf09c31eeb52a45dfa7ebe3778dp-4L, + 0x1.04b133a39ed8a09691205660468bp+0L, + 0x1.22b547a06edda944fcb12fd9b5ecp+0L, + 0x1.2c57fce7db86a91df09602d344b3p+0L, + 0x1.4aade4894708f84795212fe257eep+0L, + 0x1.579c8b7b67ec4afed5b28c8bf787p+0L, + 0x1.776820e7fc80ae5284239733078ap+0L, + 0x1.883ab28c7301fde4ca6b8ec26ec8p+0L, + 0x1.aa2ef6e1ae52eb42c9ee83b206e3p+0L, + 0x1.bf4ad50f0a9a9311300cf0c51ee7p+0L, + 0x1.e40206e0e96b1da463814dde0d09p+0L, + 0x1.fdcbcffef3a21b29719c2bd9feb1p+0L, + 0x2.25e2e8948939c4d42cf108fae4bep+0L, + 0x2.44ce14d2b59c1c0e6bf2cfa81018p+0L, + 0x2.70ee80bbd0387162be4861c43622p+0L, + 0x2.954b64d2c2ebf3489b949c74476p+0L, + 0x2.c616e133a811c1c9446105208656p+0L, + 0x3.05a69dfe1a9ba1079f90fcf26bd4p+0L, + 0x3.410d2ad16a0506de29736e6aafdap+0L, + }; + +static const size_t poly_deg[] = + { + 23, + 24, + 25, + 27, + 28, + 26, + 24, + 23, + }; + +static const size_t poly_end[] = + { + 23, + 48, + 74, + 102, + 131, + 158, + 183, + 207, + }; + +/* Compute sin (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_sinpi (long double x) +{ + if (x <= 0.25L) + return __sinl (M_PIl * x); + else + return __cosl (M_PIl * (0.5L - x)); +} + +/* Compute cos (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_cospi (long double x) +{ + if (x <= 0.25L) + return __cosl (M_PIl * x); + else + return __sinl (M_PIl * (0.5L - x)); +} + +/* Compute cot (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_cotpi (long double x) +{ + return lg_cospi (x) / lg_sinpi (x); +} + +/* Compute lgamma of a negative argument -50 < X < -2, setting + *SIGNGAMP accordingly. */ + +long double +__lgamma_negl (long double x, int *signgamp) +{ + /* Determine the half-integer region X lies in, handle exact + integers and determine the sign of the result. */ + int i = __floorl (-2 * x); + if ((i & 1) == 0 && i == -2 * x) + return 1.0L / 0.0L; + long double xn = ((i & 1) == 0 ? -i / 2 : (-i - 1) / 2); + i -= 4; + *signgamp = ((i & 2) == 0 ? -1 : 1); + + SET_RESTORE_ROUNDL (FE_TONEAREST); + + /* Expand around the zero X0 = X0_HI + X0_LO. */ + long double x0_hi = lgamma_zeros[i][0], x0_lo = lgamma_zeros[i][1]; + long double xdiff = x - x0_hi - x0_lo; + + /* For arguments in the range -3 to -2, use polynomial + approximations to an adjusted version of the gamma function. */ + if (i < 2) + { + int j = __floorl (-8 * x) - 16; + long double xm = (-33 - 2 * j) * 0.0625L; + long double x_adj = x - xm; + size_t deg = poly_deg[j]; + size_t end = poly_end[j]; + long double g = poly_coeff[end]; + for (size_t j = 1; j <= deg; j++) + g = g * x_adj + poly_coeff[end - j]; + return __log1pl (g * xdiff / (x - xn)); + } + + /* The result we want is log (sinpi (X0) / sinpi (X)) + + log (gamma (1 - X0) / gamma (1 - X)). */ + long double x_idiff = fabsl (xn - x), x0_idiff = fabsl (xn - x0_hi - x0_lo); + long double log_sinpi_ratio; + if (x0_idiff < x_idiff * 0.5L) + /* Use log not log1p to avoid inaccuracy from log1p of arguments + close to -1. */ + log_sinpi_ratio = __ieee754_logl (lg_sinpi (x0_idiff) + / lg_sinpi (x_idiff)); + else + { + /* Use log1p not log to avoid inaccuracy from log of arguments + close to 1. X0DIFF2 has positive sign if X0 is further from + XN than X is from XN, negative sign otherwise. */ + long double x0diff2 = ((i & 1) == 0 ? xdiff : -xdiff) * 0.5L; + long double sx0d2 = lg_sinpi (x0diff2); + long double cx0d2 = lg_cospi (x0diff2); + log_sinpi_ratio = __log1pl (2 * sx0d2 + * (-sx0d2 + cx0d2 * lg_cotpi (x_idiff))); + } + + long double log_gamma_ratio; + long double y0 = 1 - x0_hi; + long double y0_eps = -x0_hi + (1 - y0) - x0_lo; + long double y = 1 - x; + long double y_eps = -x + (1 - y); + /* We now wish to compute LOG_GAMMA_RATIO + = log (gamma (Y0 + Y0_EPS) / gamma (Y + Y_EPS)). XDIFF + accurately approximates the difference Y0 + Y0_EPS - Y - + Y_EPS. Use Stirling's approximation. First, we may need to + adjust into the range where Stirling's approximation is + sufficiently accurate. */ + long double log_gamma_adj = 0; + if (i < 20) + { + int n_up = (21 - i) / 2; + long double ny0, ny0_eps, ny, ny_eps; + ny0 = y0 + n_up; + ny0_eps = y0 - (ny0 - n_up) + y0_eps; + y0 = ny0; + y0_eps = ny0_eps; + ny = y + n_up; + ny_eps = y - (ny - n_up) + y_eps; + y = ny; + y_eps = ny_eps; + long double prodm1 = __lgamma_productl (xdiff, y - n_up, y_eps, n_up); + log_gamma_adj = -__log1pl (prodm1); + } + long double log_gamma_high + = (xdiff * __log1pl ((y0 - e_hi - e_lo + y0_eps) / e_hi) + + (y - 0.5L + y_eps) * __log1pl (xdiff / y) + log_gamma_adj); + /* Compute the sum of (B_2k / 2k(2k-1))(Y0^-(2k-1) - Y^-(2k-1)). */ + long double y0r = 1 / y0, yr = 1 / y; + long double y0r2 = y0r * y0r, yr2 = yr * yr; + long double rdiff = -xdiff / (y * y0); + long double bterm[NCOEFF]; + long double dlast = rdiff, elast = rdiff * yr * (yr + y0r); + bterm[0] = dlast * lgamma_coeff[0]; + for (size_t j = 1; j < NCOEFF; j++) + { + long double dnext = dlast * y0r2 + elast; + long double enext = elast * yr2; + bterm[j] = dnext * lgamma_coeff[j]; + dlast = dnext; + elast = enext; + } + long double log_gamma_low = 0; + for (size_t j = 0; j < NCOEFF; j++) + log_gamma_low += bterm[NCOEFF - 1 - j]; + log_gamma_ratio = log_gamma_high + log_gamma_low; + + return log_sinpi_ratio + log_gamma_ratio; +} diff --git a/sysdeps/ieee754/ldbl-128/lgamma_productl.c b/sysdeps/ieee754/ldbl-128/lgamma_productl.c new file mode 100644 index 0000000000..cf0c778d93 --- /dev/null +++ b/sysdeps/ieee754/ldbl-128/lgamma_productl.c @@ -0,0 +1,82 @@ +/* Compute a product of 1 + (T/X), 1 + (T/(X+1)), .... + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static void +mul_split (long double *hi, long double *lo, long double x, long double y) +{ +#ifdef __FP_FAST_FMAL + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fmal (x, y, -*hi); +#elif defined FP_FAST_FMAL + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fmal (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) + long double x1 = x * C; + long double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + long double x2 = x - x1; + long double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + + 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that + all the values X + 1, ..., X + N - 1 are exactly representable, and + X_EPS / X is small enough that factors quadratic in it can be + neglected. */ + +long double +__lgamma_productl (long double t, long double x, long double x_eps, int n) +{ + long double ret = 0, ret_eps = 0; + for (int i = 0; i < n; i++) + { + long double xi = x + i; + long double quot = t / xi; + long double mhi, mlo; + mul_split (&mhi, &mlo, quot, xi); + long double quot_lo = (t - mhi - mlo) / xi - t * x_eps / (xi * xi); + /* We want (1 + RET + RET_EPS) * (1 + QUOT + QUOT_LO) - 1. */ + long double rhi, rlo; + mul_split (&rhi, &rlo, ret, quot); + long double rpq = ret + quot; + long double rpq_eps = (ret - rpq) + quot; + long double nret = rpq + rhi; + long double nret_eps = (rpq - nret) + rhi; + ret_eps += (rpq_eps + nret_eps + rlo + ret_eps * quot + + quot_lo + quot_lo * (ret + ret_eps)); + ret = nret; + } + return ret + ret_eps; +} diff --git a/sysdeps/ieee754/ldbl-128ibm/lgamma_negl.c b/sysdeps/ieee754/ldbl-128ibm/lgamma_negl.c new file mode 100644 index 0000000000..cd076ecf6a --- /dev/null +++ b/sysdeps/ieee754/ldbl-128ibm/lgamma_negl.c @@ -0,0 +1,532 @@ +/* lgammal expanding around zeros. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <math_private.h> + +static const long double lgamma_zeros[][2] = + { + { -0x2.74ff92c01f0d82abec9f315f1ap+0L, -0x7.12c334804d9a79cb5d46094d46p-112L }, + { -0x2.bf6821437b20197995a4b4641fp+0L, 0x5.140b4ff4b7d6069e1bd7acc196p-108L }, + { -0x3.24c1b793cb35efb8be699ad3dap+0L, 0x4.59abab3480539f1c0e926287cp-108L }, + { -0x3.f48e2a8f85fca170d456129123p+0L, -0x6.cc320a4887d1cb4c711828a75ep-108L }, + { -0x4.0a139e16656030c39f0b0de182p+0L, 0xe.d53e84029416e1242006b2b3dp-108L }, + { -0x4.fdd5de9bbabf3510d0aa407698p+0L, -0x8.501d7d78125286f78d1e501f14p-108L }, + { -0x5.021a95fc2db6432a4c56e5953ap+0L, 0xb.2133950fbcf2b01a8b9058dcccp-108L }, + { -0x5.ffa4bd647d0357dd4ed62cbd32p+0L, 0x1.2071c071a2145d2982428f2269p-108L }, + { -0x6.005ac9625f233b607c2d96d164p+0L, 0x7.a347953a96cbf30e1a0db20856p-108L }, + { -0x6.fff2fddae1bbff3d626b65c24p+0L, 0x2.de0bfcff5c457ebcf4d3ad9674p-108L }, + { -0x7.000cff7b7f87adf4482dcdb988p+0L, 0x7.d54d99e35a74d6407b80292df2p-108L }, + { -0x7.fffe5fe05673c3ca9e82b522bp+0L, -0xc.a9d2e8837cd1f14bd3d05002e4p-108L }, + { -0x8.0001a01459fc9f60cb3cec1cecp+0L, -0x8.576677ca538d88084310983b8p-108L }, + { -0x8.ffffd1c425e80ffc864e957494p+0L, 0x1.a6181dfdef1807e3087e4bb163p-104L }, + { -0x9.00002e3bb47d86d6d843fedc34p+0L, -0x1.1deb7ad09ec5e9d6e8ae2d548bp-104L }, + { -0x9.fffffb606bdfdcd062ae77a504p+0L, -0x1.47c69d2eb6f33d170fce38ff818p-104L }, + { -0xa.0000049f93bb9927b45d95e154p+0L, -0x4.1e03086db9146a9287bd4f2172p-108L }, + { -0xa.ffffff9466e9f1b36dacd2adbcp+0L, -0x1.18d05a4e458062f3f95345a4dap-104L }, + { -0xb.0000006b9915315d965a6ffea4p+0L, -0xe.4bea39000dcc1848023c5f6bdcp-112L }, + { -0xb.fffffff7089387387de41acc3cp+0L, -0x1.3c978bd839c8c428b5efcf91ef8p-104L }, + { -0xc.00000008f76c7731567c0f025p+0L, -0xf.387920df5675833859190eb128p-108L }, + { -0xc.ffffffff4f6dcf617f97a5ffc8p+0L, 0xa.82ab72d76f32eaee2d1a42ed5p-108L }, + { -0xd.00000000b092309c06683dd1b8p+0L, -0x1.03e3700857a15c19ac5a611de98p-104L }, + { -0xd.fffffffff36345ab9e184a3e08p+0L, -0x1.d1176dc48e47f62d917973dd45p-104L }, + { -0xe.000000000c9cba545e94e75ec4p+0L, -0x1.718f753e2501e757a17cf2ecbfp-104L }, + { -0xe.ffffffffff28c060c6604ef304p+0L, 0x8.e0762c8ca8361c23e8393919c4p-108L }, + { -0xf.0000000000d73f9f399bd0e42p+0L, -0xf.85e9ee31b0b890744fc0e3fbcp-108L }, + { -0xf.fffffffffff28c060c6621f514p+0L, 0x1.18d1b2eec9d960bd9adc5be5f6p-104L }, + { -0x1.000000000000d73f9f399da1428p+4L, 0x3.406c46e0e88305d2800f0e414cp-104L }, + { -0x1.0ffffffffffff3569c47e7a93ep+4L, -0x1.c46a08a2e008a998ebabb8087fp-104L }, + { -0x1.1000000000000ca963b81856888p+4L, -0x7.6ca5a3a64ec15db0a95caf2cap-108L }, + { -0x1.1fffffffffffff4bec3ce23413p+4L, -0x2.d08b2b726187c841cb92cd5222p-104L }, + { -0x1.20000000000000b413c31dcbec8p+4L, -0x2.4c3b2ffacbb4932f18dceedfd7p-104L }, + { -0x1.2ffffffffffffff685b25cbf5f8p+4L, 0x2.ba3126cd1c7b7a0822d694705cp-104L }, + { -0x1.30000000000000097a4da340a08p+4L, -0x2.b81b7b1f1f001c72bf914141efp-104L }, + { -0x1.3fffffffffffffff86af516ff8p+4L, 0x8.9429818df2a87abafd48248a2p-108L }, + { -0x1.40000000000000007950ae9008p+4L, -0x8.9413ccc8a353fda263f8ce973cp-108L }, + { -0x1.4ffffffffffffffffa391c4249p+4L, 0x3.d5c63022b62b5484ba346524dbp-104L }, + { -0x1.500000000000000005c6e3bdb7p+4L, -0x3.d5c62f55ed5322b2685c5e9a52p-104L }, + { -0x1.5fffffffffffffffffbcc71a49p+4L, -0x2.01eb5aeb96c74d7ad25e060529p-104L }, + { -0x1.6000000000000000004338e5b7p+4L, 0x2.01eb5aec04b2f2eb663e4e3d8ap-104L }, + { -0x1.6ffffffffffffffffffd13c97d8p+4L, -0x1.d38fcc4d08d6fe5aa56ab04308p-104L }, + { -0x1.70000000000000000002ec36828p+4L, 0x1.d38fcc4d090cee2f5d0b69a99cp-104L }, + { -0x1.7fffffffffffffffffffe0d31p+4L, 0x1.972f577cca4b4c8cb1dc14001bp-104L }, + { -0x1.800000000000000000001f2cfp+4L, -0x1.972f577cca4b3442e35f0040b38p-104L }, + { -0x1.8ffffffffffffffffffffec0c3p+4L, -0x3.22e9a0572b1bb5b95f346a92d6p-104L }, + { -0x1.90000000000000000000013f3dp+4L, 0x3.22e9a0572b1bb5c371ddb35617p-104L }, + { -0x1.9ffffffffffffffffffffff3b88p+4L, -0x3.d01cad8d32e386fd783e97296dp-104L }, + { -0x1.a0000000000000000000000c478p+4L, 0x3.d01cad8d32e386fd7c1ab8c1fep-104L }, + { -0x1.afffffffffffffffffffffff8b8p+4L, -0x1.538f48cc5737d5979c39db806c8p-104L }, + { -0x1.b00000000000000000000000748p+4L, 0x1.538f48cc5737d5979c3b3a6bdap-104L }, + { -0x1.bffffffffffffffffffffffffcp+4L, 0x2.862898d42174dcf171470d8c8cp-104L }, + { -0x1.c0000000000000000000000004p+4L, -0x2.862898d42174dcf171470d18bap-104L }, + { -0x1.dp+4L, 0x2.4b3f31686b15af57c61ceecdf4p-104L }, + { -0x1.dp+4L, -0x2.4b3f31686b15af57c61ceecdd1p-104L }, + { -0x1.ep+4L, 0x1.3932c5047d60e60caded4c298ap-108L }, + { -0x1.ep+4L, -0x1.3932c5047d60e60caded4c29898p-108L }, + { -0x1.fp+4L, 0xa.1a6973c1fade2170f7237d36p-116L }, + { -0x1.fp+4L, -0xa.1a6973c1fade2170f7237d36p-116L }, + { -0x2p+4L, 0x5.0d34b9e0fd6f10b87b91be9bp-120L }, + { -0x2p+4L, -0x5.0d34b9e0fd6f10b87b91be9bp-120L }, + { -0x2.1p+4L, 0x2.73024a9ba1aa36a7059bff52e8p-124L }, + { -0x2.1p+4L, -0x2.73024a9ba1aa36a7059bff52e8p-124L }, + { -0x2.2p+4L, 0x1.2710231c0fd7a13f8a2b4af9d68p-128L }, + { -0x2.2p+4L, -0x1.2710231c0fd7a13f8a2b4af9d68p-128L }, + { -0x2.3p+4L, 0x8.6e2ce38b6c8f9419e3fad3f03p-136L }, + { -0x2.3p+4L, -0x8.6e2ce38b6c8f9419e3fad3f03p-136L }, + { -0x2.4p+4L, 0x3.bf30652185952560d71a254e4fp-140L }, + { -0x2.4p+4L, -0x3.bf30652185952560d71a254e4fp-140L }, + { -0x2.5p+4L, 0x1.9ec8d1c94e85af4c78b15c3d8ap-144L }, + { -0x2.5p+4L, -0x1.9ec8d1c94e85af4c78b15c3d8ap-144L }, + { -0x2.6p+4L, 0xa.ea565ce061d57489e9b8527628p-152L }, + { -0x2.6p+4L, -0xa.ea565ce061d57489e9b8527628p-152L }, + { -0x2.7p+4L, 0x4.7a6512692eb37804111dabad3p-156L }, + { -0x2.7p+4L, -0x4.7a6512692eb37804111dabad3p-156L }, + { -0x2.8p+4L, 0x1.ca8ed42a12ae3001a07244abadp-160L }, + { -0x2.8p+4L, -0x1.ca8ed42a12ae3001a07244abadp-160L }, + { -0x2.9p+4L, 0xb.2f30e1ce812063f12e7e8d8d98p-168L }, + { -0x2.9p+4L, -0xb.2f30e1ce812063f12e7e8d8d98p-168L }, + { -0x2.ap+4L, 0x4.42bd49d4c37a0db136489772e4p-172L }, + { -0x2.ap+4L, -0x4.42bd49d4c37a0db136489772e4p-172L }, + { -0x2.bp+4L, 0x1.95db45257e5122dcbae56def37p-176L }, + { -0x2.bp+4L, -0x1.95db45257e5122dcbae56def37p-176L }, + { -0x2.cp+4L, 0x9.3958d81ff63527ecf993f3fb7p-184L }, + { -0x2.cp+4L, -0x9.3958d81ff63527ecf993f3fb7p-184L }, + { -0x2.dp+4L, 0x3.47970e4440c8f1c058bd238c99p-188L }, + { -0x2.dp+4L, -0x3.47970e4440c8f1c058bd238c99p-188L }, + { -0x2.ep+4L, 0x1.240804f65951062ca46e4f25c6p-192L }, + { -0x2.ep+4L, -0x1.240804f65951062ca46e4f25c6p-192L }, + { -0x2.fp+4L, 0x6.36a382849fae6de2d15362d8a4p-200L }, + { -0x2.fp+4L, -0x6.36a382849fae6de2d15362d8a4p-200L }, + { -0x3p+4L, 0x2.123680d6dfe4cf4b9b1bcb9d8cp-204L }, + }; + +static const long double e_hi = 0x2.b7e151628aed2a6abf7158809dp+0L; +static const long double e_lo = -0xb.0c389d18e9f0c74b25a9587b28p-112L; + +/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) in Stirling's + approximation to lgamma function. */ + +static const long double lgamma_coeff[] = + { + 0x1.555555555555555555555555558p-4L, + -0xb.60b60b60b60b60b60b60b60b6p-12L, + 0x3.4034034034034034034034034p-12L, + -0x2.7027027027027027027027027p-12L, + 0x3.72a3c5631fe46ae1d4e700dca9p-12L, + -0x7.daac36664f1f207daac36664f2p-12L, + 0x1.a41a41a41a41a41a41a41a41a4p-8L, + -0x7.90a1b2c3d4e5f708192a3b4c5ep-8L, + 0x2.dfd2c703c0cfff430edfd2c704p-4L, + -0x1.6476701181f39edbdb9ce625988p+0L, + 0xd.672219167002d3a7a9c886459cp+0L, + -0x9.cd9292e6660d55b3f712eb9e08p+4L, + 0x8.911a740da740da740da740da74p+8L, + -0x8.d0cc570e255bf59ff6eec24b48p+12L, + 0xa.8d1044d3708d1c219ee4fdc448p+16L, + -0xe.8844d8a169abbc406169abbc4p+20L, + 0x1.6d29a0f6433b79890cede624338p+28L, + -0x2.88a233b3c8cddaba9809357126p+32L, + 0x5.0dde6f27500939a85c40939a86p+36L, + -0xb.4005bde03d4642a243581714bp+40L, + 0x1.bc8cd6f8f1f755c78753cdb5d6p+48L, + -0x4.bbebb143bb94de5a0284fa7ec4p+52L, + 0xe.2e1337f5af0bed90b6b0a352d4p+56L, + -0x2.e78250162b62405ad3e4bfe61bp+64L, + 0xa.5f7eef9e71ac7c80326ab4cc8cp+68L, + -0x2.83be0395e550213369924971b2p+76L, + }; + +#define NCOEFF (sizeof (lgamma_coeff) / sizeof (lgamma_coeff[0])) + +/* Polynomial approximations to (|gamma(x)|-1)(x-n)/(x-x0), where n is + the integer end-point of the half-integer interval containing x and + x0 is the zero of lgamma in that half-integer interval. Each + polynomial is expressed in terms of x-xm, where xm is the midpoint + of the interval for which the polynomial applies. */ + +static const long double poly_coeff[] = + { + /* Interval [-2.125, -2] (polynomial degree 21). */ + -0x1.0b71c5c54d42eb6c17f30b7aa9p+0L, + -0xc.73a1dc05f34951602554c6d76cp-4L, + -0x1.ec841408528b51473e6c42f1c58p-4L, + -0xe.37c9da26fc3c9a3c1844c04b84p-4L, + -0x1.03cd87c519305703b00b046ce4p-4L, + -0xe.ae9ada65e09aa7f1c817c91048p-4L, + 0x9.b11855a4864b571b6a4f571c88p-8L, + -0xe.f28c133e697a95ba2dabb97584p-4L, + 0x2.6ec14a1c586a7ddb6c4be90fe1p-4L, + -0xf.57cab973e14496f0900851c0d4p-4L, + 0x4.5b0fc25f16b0df37175495c70cp-4L, + -0xf.f50e59f1a8fb8c402091e3cd3cp-4L, + 0x6.5f5eae1681d1e50e575c3d4d36p-4L, + -0x1.0d2422dac7ea8a52db6bf0d14fp+0L, + 0x8.820008f221eae5a36e15913bacp-4L, + -0x1.1f492eec53b9481ea23a7e944ep+0L, + 0xa.cb55b4d662945e8cf1f81ee5b4p-4L, + -0x1.3616863983e131d7935700ccd48p+0L, + 0xd.43c783ebab66074d18709d5cap-4L, + -0x1.51d5dbc56bc85976871c6e51f78p+0L, + 0x1.06253af656eb6b2ed998387aabp+0L, + -0x1.7d910a0aadc63d7a1ef7690dbb8p+0L, + /* Interval [-2.25, -2.125] (polynomial degree 22). */ + -0xf.2930890d7d675a80c36afb0fd4p-4L, + -0xc.a5cfde054eab5c6770daeca684p-4L, + 0x3.9c9e0fdebb07cdf89c61d434adp-4L, + -0x1.02a5ad35605fcf4af65a67fe8a8p+0L, + 0x9.6e9b1185bb48be9de18d8bbeb8p-4L, + -0x1.4d8332f3cfbfa116fdf648372cp+0L, + 0x1.1c0c8cb4d9f4b1d495142b53ebp+0L, + -0x1.c9a6f5ae9130ccfb9b7e39136f8p+0L, + 0x1.d7e9307fd58a2e85209d0e83eap+0L, + -0x2.921cb3473d96462f22c171712fp+0L, + 0x2.e8d59113b6f3fc1ed3b556b62cp+0L, + -0x3.cbab931624e3b6cf299cea1213p+0L, + 0x4.7d9f0f05d2c4cf91e41ea1f048p+0L, + -0x5.ade9cba31affa276fe516135eep+0L, + 0x6.dc983a62cf6ddc935ae3c5b9ap+0L, + -0x8.8d9ed100b2a7813f82cbd83e3cp+0L, + 0xa.6fa0926892835a9a29c9b8db8p+0L, + -0xc.ebc90aff4ffe319d70bef0d61p+0L, + 0xf.d69cf50ab226bacece014c0b44p+0L, + -0x1.389964ac7cfef4578eec028e5c8p+4L, + 0x1.7ff0d2090164e25901f97cab3bp+4L, + -0x1.e9e6d282da6bd004619d073071p+4L, + 0x2.5d719ab6ad4be8b5c32b0fba2ap+4L, + /* Interval [-2.375, -2.25] (polynomial degree 24). */ + -0xd.7d28d505d6181218a25f31d5e4p-4L, + -0xe.69649a3040985140cdf946827cp-4L, + 0xb.0d74a2827d053a8d4459500f88p-4L, + -0x1.924b0922853617cac181b097e48p+0L, + 0x1.d49b12bccf0a568582e2dbf8ep+0L, + -0x3.0898bb7d8c4093e6360d26bbc5p+0L, + 0x4.207a6cac711cb538684f74619ep+0L, + -0x6.39ee63ea4fb1dcac86ab337e3cp+0L, + 0x8.e2e2556a797b64a1b9328a3978p+0L, + -0xd.0e83ac82552ee5596df1706ff4p+0L, + 0x1.2e4525e0ce666e48fac68ddcdep+4L, + -0x1.b8e350d6a8f6597ed2eb3c2eff8p+4L, + 0x2.805cd69b9197ee0089dd1b1c46p+4L, + -0x3.a42585423e4d00db075f2d687ep+4L, + 0x5.4b4f409f874e2a7dcd8aa4a62ap+4L, + -0x7.b3c5829962ca1b95535db9cc4ep+4L, + 0xb.33b7b928986ec6b219e2e15a98p+4L, + -0x1.04b76dec4115106bb16316d9cd8p+8L, + 0x1.7b366d8d46f179d5c5302d6534p+8L, + -0x2.2799846ddc54813d40da622b99p+8L, + 0x3.2253a862c1078a3ccabac65bebp+8L, + -0x4.8d92cebc90a4a29816f4952f4ep+8L, + 0x6.9ebb8f9d72c66c80c4f4492e7ap+8L, + -0xa.2850a483f9ba0e43f5848b5cd8p+8L, + 0xe.e1b6bdce83b27944edab8c428p+8L, + /* Interval [-2.5, -2.375] (polynomial degree 25). */ + -0xb.74ea1bcfff94b2c01afba9daa8p-4L, + -0x1.2a82bd590c37538cab143308e3p+0L, + 0x1.88020f828b966fec66b8648d16p+0L, + -0x3.32279f040eb694970e9db0308bp+0L, + 0x5.57ac82517767e68a72142041b4p+0L, + -0x9.c2aedcfe22833de438786dc658p+0L, + 0x1.12c132f1f5577f99dbfb7ecb408p+4L, + -0x1.ea94e26628a3de3557dc349db8p+4L, + 0x3.66b4ac4fa582f5cbe7e19d10c6p+4L, + -0x6.0cf746a9cf4cbcb0004cb01f66p+4L, + 0xa.c102ef2c20d5a313cbfd37f5b8p+4L, + -0x1.31ebff06e8f08f58d1c35eacfdp+8L, + 0x2.1fd6f0c0e788660ba1f1573722p+8L, + -0x3.c6d760404305e75356a86a11d6p+8L, + 0x6.b6d18e0c31a2ba4d5b5ac78676p+8L, + -0xb.efaf5426343e6b41a823ed6c44p+8L, + 0x1.53852db2fe01305b9f336d132d8p+12L, + -0x2.5b977cb2b568382e71ca93a36bp+12L, + 0x4.310d090a6119c7d85a2786a616p+12L, + -0x7.73a518387ef1d4d04917dfb25cp+12L, + 0xd.3f965798601aabd24bdaa6e68cp+12L, + -0x1.78db20b0b166480c93cf0031198p+16L, + 0x2.9be0068b65cf13bd1cf71f0eccp+16L, + -0x4.a221230466b9cd51d5b811d6b6p+16L, + 0x8.f6f8c13e2b52aa3e30a4ce6898p+16L, + -0x1.02145337ff16b44fa7c2adf7f28p+20L, + /* Interval [-2.625, -2.5] (polynomial degree 26). */ + -0x3.d10108c27ebafad533c20eac33p-4L, + 0x1.cd557caff7d2b2085f41dbec538p+0L, + 0x3.819b4856d399520dad9776ebb9p+0L, + 0x6.8505cbad03dc34c5e42e89c4b4p+0L, + 0xb.c1b2e653a9e38f82b3997134a8p+0L, + 0x1.50a53a38f1481381051544750ep+4L, + 0x2.57ae00cbe5232cbeef4e94eb2cp+4L, + 0x4.2b156301b8604db82856d5767p+4L, + 0x7.6989ed23ca3ca751fc9c32eb88p+4L, + 0xd.2dd29765579396f3a456772c44p+4L, + 0x1.76e1c3430eb8630991d1aa8a248p+8L, + 0x2.9a77bf548873743fe65d025f56p+8L, + 0x4.a0d62ed7266389753842d7be74p+8L, + 0x8.3a6184dd32d31ec73fc6f2d37cp+8L, + 0xe.a0ade153a3bf0247db49e11ae8p+8L, + 0x1.a01359fa74d4eaf8858bbc35f68p+12L, + 0x2.e3b0a32845cbc135bae4a5216cp+12L, + 0x5.23012653815fe88456170a7dc6p+12L, + 0x9.21c92dcde748ec199bc9c65738p+12L, + 0x1.03c0f3621b4c67d2d86e5e813d8p+16L, + 0x1.cdc884edcc9f5404f2708551cb8p+16L, + 0x3.35025f0b1624d1ffc86688bf03p+16L, + 0x5.b3bd9562ebf2409c5ce99929ep+16L, + 0xa.1a229b1986d9f89cb80abccfdp+16L, + 0x1.1e69136ebd520146d51837f3308p+20L, + 0x2.2d2738c72449db2524171b9271p+20L, + 0x4.036e80cc6621b836f94f426834p+20L, + /* Interval [-2.75, -2.625] (polynomial degree 24). */ + -0x6.b5d252a56e8a75458a27ed1c2ep-4L, + 0x1.28d60383da3ac721aed3c57949p+0L, + 0x1.db6513ada8a66ea77d87d9a796p+0L, + 0x2.e217118f9d348a27f7506c4b4fp+0L, + 0x4.450112c5cbf725a0fb982fc44cp+0L, + 0x6.4af99151eae7810a75a5fceac8p+0L, + 0x9.2db598b4a97a7f69ab7be31128p+0L, + 0xd.62bef9c22471f5f17955733c6p+0L, + 0x1.379f294e412bd6255506135f4a8p+4L, + 0x1.c5827349d8865d858d4f85f3c38p+4L, + 0x2.93a7e7a75b755bbea1785a1349p+4L, + 0x3.bf9bb882afed66a08b22ed7a45p+4L, + 0x5.73c737828d2044aca95fdef33ep+4L, + 0x7.ee46534920f1c81574db260f0ep+4L, + 0xb.891c6b837b513eaf1592fe78ccp+4L, + 0x1.0c775d815bf741526a3dd66ded8p+8L, + 0x1.867ee44cf11f26455a8924a56bp+8L, + 0x2.37fe968baa1018e55cae680f1dp+8L, + 0x3.3a2c557f686679eb5d8e960fd1p+8L, + 0x4.b1ba0539d4d80cc9174738b992p+8L, + 0x6.d3fd80155b6d2211956cb6bc5ap+8L, + 0x9.eb5a96b0ee3d9ca523f5fbc1fp+8L, + 0xe.6b37429c1acc7dc19ef312dda4p+8L, + 0x1.621132d6aa138b203a28e4792fp+12L, + 0x2.09610219270e2ce11a985d4d36p+12L, + /* Interval [-2.875, -2.75] (polynomial degree 23). */ + -0x8.a41b1e4f36ff88dc820815607cp-4L, + 0xc.da87d3b69dc0f2f9c6f368b8c8p-4L, + 0x1.1474ad5c36158a7bea04fd30b28p+0L, + 0x1.761ecb90c555df6555b7dbb9ce8p+0L, + 0x1.d279bff9ae291caf6c4b17497f8p+0L, + 0x2.4e5d00559a6e2b9b5d7e35b575p+0L, + 0x2.d57545a75cee8743b1ff6e22b8p+0L, + 0x3.8514eee3aac88b89d2d4ddef4ep+0L, + 0x4.5235e3b6e1891fd9c975383318p+0L, + 0x5.562acdb10eef3c14a780490e3cp+0L, + 0x6.8ec8965c76f0b261bc41b5e532p+0L, + 0x8.15251aca144a98a1e1c0981388p+0L, + 0x9.f08d56ab9e7eee9515a457214cp+0L, + 0xc.3dbbeda2620d5be4fe8621ce6p+0L, + 0xf.0f5bfd65b3feb6d745a2cdbf9cp+0L, + 0x1.28a6ccd8dd27fb90fcaa31d37dp+4L, + 0x1.6d0a3a3091c3d64cfd1a3c5769p+4L, + 0x1.c1570107e02d5ab0b8bea6d6c98p+4L, + 0x2.28fc9b295b583fa469de7acceap+4L, + 0x2.a8a4cac0217026bbdbce34f4adp+4L, + 0x3.4532c98bce75262ac0ede53edep+4L, + 0x4.062fd9ba18e00e55c25a4f0688p+4L, + 0x5.22e00e6d9846a3451fad5587f8p+4L, + 0x6.5d0f7ce92a0bf928d4a30e92c6p+4L, + /* Interval [-3, -2.875] (polynomial degree 22). */ + -0xa.046d667e468f3e44dcae1afcc8p-4L, + 0x9.70b88dcc006c214d8d996fdf7p-4L, + 0xa.a8a39421c86d3ff24931a093c4p-4L, + 0xd.2f4d1363f324da2b357c850124p-4L, + 0xd.ca9aa1a3a5c00de11bf5d7047p-4L, + 0xf.cf09c31eeb52a45dfb25e50ebcp-4L, + 0x1.04b133a39ed8a096914cc78812p+0L, + 0x1.22b547a06edda9447f516a2ee7p+0L, + 0x1.2c57fce7db86a91c8d0f12077b8p+0L, + 0x1.4aade4894708fb8b78365e9bf88p+0L, + 0x1.579c8b7b67ec5179ecc4e9c7dp+0L, + 0x1.776820e7fc7361c50e7ef40a88p+0L, + 0x1.883ab28c72ef238ada6c480ab18p+0L, + 0x1.aa2ef6e1d11b9fcea06a1dcab1p+0L, + 0x1.bf4ad50f2dd2aeb02395ea08648p+0L, + 0x1.e40206a5477615838e02279dfc8p+0L, + 0x1.fdcbcfd4b0777fb173b85d5b398p+0L, + 0x2.25e32b3b3c89e833029169a17bp+0L, + 0x2.44ce344ff0bda6570fe3d0a76dp+0L, + 0x2.70bfba6fa079faf2dbf31d2216p+0L, + 0x2.953e22a97725cc179ad21024fap+0L, + 0x2.d8ccc51524659a499eee0f267p+0L, + 0x3.080fbb09c14936c2171c8a51bcp+0L, + }; + +static const size_t poly_deg[] = + { + 21, + 22, + 24, + 25, + 26, + 24, + 23, + 22, + }; + +static const size_t poly_end[] = + { + 21, + 44, + 69, + 95, + 122, + 147, + 171, + 194, + }; + +/* Compute sin (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_sinpi (long double x) +{ + if (x <= 0.25L) + return __sinl (M_PIl * x); + else + return __cosl (M_PIl * (0.5L - x)); +} + +/* Compute cos (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_cospi (long double x) +{ + if (x <= 0.25L) + return __cosl (M_PIl * x); + else + return __sinl (M_PIl * (0.5L - x)); +} + +/* Compute cot (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_cotpi (long double x) +{ + return lg_cospi (x) / lg_sinpi (x); +} + +/* Compute lgamma of a negative argument -48 < X < -2, setting + *SIGNGAMP accordingly. */ + +long double +__lgamma_negl (long double x, int *signgamp) +{ + /* Determine the half-integer region X lies in, handle exact + integers and determine the sign of the result. */ + int i = __floorl (-2 * x); + if ((i & 1) == 0 && i == -2 * x) + return 1.0L / 0.0L; + long double xn = ((i & 1) == 0 ? -i / 2 : (-i - 1) / 2); + i -= 4; + *signgamp = ((i & 2) == 0 ? -1 : 1); + + SET_RESTORE_ROUNDL (FE_TONEAREST); + + /* Expand around the zero X0 = X0_HI + X0_LO. */ + long double x0_hi = lgamma_zeros[i][0], x0_lo = lgamma_zeros[i][1]; + long double xdiff = x - x0_hi - x0_lo; + + /* For arguments in the range -3 to -2, use polynomial + approximations to an adjusted version of the gamma function. */ + if (i < 2) + { + int j = __floorl (-8 * x) - 16; + long double xm = (-33 - 2 * j) * 0.0625L; + long double x_adj = x - xm; + size_t deg = poly_deg[j]; + size_t end = poly_end[j]; + long double g = poly_coeff[end]; + for (size_t j = 1; j <= deg; j++) + g = g * x_adj + poly_coeff[end - j]; + return __log1pl (g * xdiff / (x - xn)); + } + + /* The result we want is log (sinpi (X0) / sinpi (X)) + + log (gamma (1 - X0) / gamma (1 - X)). */ + long double x_idiff = fabsl (xn - x), x0_idiff = fabsl (xn - x0_hi - x0_lo); + long double log_sinpi_ratio; + if (x0_idiff < x_idiff * 0.5L) + /* Use log not log1p to avoid inaccuracy from log1p of arguments + close to -1. */ + log_sinpi_ratio = __ieee754_logl (lg_sinpi (x0_idiff) + / lg_sinpi (x_idiff)); + else + { + /* Use log1p not log to avoid inaccuracy from log of arguments + close to 1. X0DIFF2 has positive sign if X0 is further from + XN than X is from XN, negative sign otherwise. */ + long double x0diff2 = ((i & 1) == 0 ? xdiff : -xdiff) * 0.5L; + long double sx0d2 = lg_sinpi (x0diff2); + long double cx0d2 = lg_cospi (x0diff2); + log_sinpi_ratio = __log1pl (2 * sx0d2 + * (-sx0d2 + cx0d2 * lg_cotpi (x_idiff))); + } + + long double log_gamma_ratio; + long double y0 = 1 - x0_hi; + long double y0_eps = -x0_hi + (1 - y0) - x0_lo; + long double y = 1 - x; + long double y_eps = -x + (1 - y); + /* We now wish to compute LOG_GAMMA_RATIO + = log (gamma (Y0 + Y0_EPS) / gamma (Y + Y_EPS)). XDIFF + accurately approximates the difference Y0 + Y0_EPS - Y - + Y_EPS. Use Stirling's approximation. First, we may need to + adjust into the range where Stirling's approximation is + sufficiently accurate. */ + long double log_gamma_adj = 0; + if (i < 18) + { + int n_up = (19 - i) / 2; + long double ny0, ny0_eps, ny, ny_eps; + ny0 = y0 + n_up; + ny0_eps = y0 - (ny0 - n_up) + y0_eps; + y0 = ny0; + y0_eps = ny0_eps; + ny = y + n_up; + ny_eps = y - (ny - n_up) + y_eps; + y = ny; + y_eps = ny_eps; + long double prodm1 = __lgamma_productl (xdiff, y - n_up, y_eps, n_up); + log_gamma_adj = -__log1pl (prodm1); + } + long double log_gamma_high + = (xdiff * __log1pl ((y0 - e_hi - e_lo + y0_eps) / e_hi) + + (y - 0.5L + y_eps) * __log1pl (xdiff / y) + log_gamma_adj); + /* Compute the sum of (B_2k / 2k(2k-1))(Y0^-(2k-1) - Y^-(2k-1)). */ + long double y0r = 1 / y0, yr = 1 / y; + long double y0r2 = y0r * y0r, yr2 = yr * yr; + long double rdiff = -xdiff / (y * y0); + long double bterm[NCOEFF]; + long double dlast = rdiff, elast = rdiff * yr * (yr + y0r); + bterm[0] = dlast * lgamma_coeff[0]; + for (size_t j = 1; j < NCOEFF; j++) + { + long double dnext = dlast * y0r2 + elast; + long double enext = elast * yr2; + bterm[j] = dnext * lgamma_coeff[j]; + dlast = dnext; + elast = enext; + } + long double log_gamma_low = 0; + for (size_t j = 0; j < NCOEFF; j++) + log_gamma_low += bterm[NCOEFF - 1 - j]; + log_gamma_ratio = log_gamma_high + log_gamma_low; + + return log_sinpi_ratio + log_gamma_ratio; +} diff --git a/sysdeps/ieee754/ldbl-128ibm/lgamma_productl.c b/sysdeps/ieee754/ldbl-128ibm/lgamma_productl.c new file mode 100644 index 0000000000..9d9c43abe2 --- /dev/null +++ b/sysdeps/ieee754/ldbl-128ibm/lgamma_productl.c @@ -0,0 +1,38 @@ +/* Compute a product of 1 + (T/X), 1 + (T/(X+1)), .... + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + + 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that + all the values X + 1, ..., X + N - 1 are exactly representable, and + X_EPS / X is small enough that factors quadratic in it can be + neglected. */ + +long double +__lgamma_productl (long double t, long double x, long double x_eps, int n) +{ + long double x_full = x + x_eps; + long double ret = 0; + for (int i = 0; i < n; i++) + /* FIXME: no extra precision used. */ + ret += (t / (x_full + i)) * (1 + ret); + return ret; +} diff --git a/sysdeps/ieee754/ldbl-96/e_lgammal_r.c b/sysdeps/ieee754/ldbl-96/e_lgammal_r.c index 0cc35f9252..a80002b48a 100644 --- a/sysdeps/ieee754/ldbl-96/e_lgammal_r.c +++ b/sysdeps/ieee754/ldbl-96/e_lgammal_r.c @@ -306,6 +306,8 @@ __ieee754_lgammal_r (long double x, int *signgamp) } if (se & 0x8000) { + if (x < -2.0L && x > -33.0L) + return __lgamma_negl (x, signgamp); t = sin_pi (x); if (t == zero) return one / fabsl (t); /* -integer */ diff --git a/sysdeps/ieee754/ldbl-96/lgamma_negl.c b/sysdeps/ieee754/ldbl-96/lgamma_negl.c new file mode 100644 index 0000000000..28535a8c3d --- /dev/null +++ b/sysdeps/ieee754/ldbl-96/lgamma_negl.c @@ -0,0 +1,418 @@ +/* lgammal expanding around zeros. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <math_private.h> + +static const long double lgamma_zeros[][2] = + { + { -0x2.74ff92c01f0d82acp+0L, 0x1.360cea0e5f8ed3ccp-68L }, + { -0x2.bf6821437b201978p+0L, -0x1.95a4b4641eaebf4cp-64L }, + { -0x3.24c1b793cb35efb8p+0L, -0xb.e699ad3d9ba6545p-68L }, + { -0x3.f48e2a8f85fca17p+0L, -0xd.4561291236cc321p-68L }, + { -0x4.0a139e16656030cp+0L, -0x3.9f0b0de18112ac18p-64L }, + { -0x4.fdd5de9bbabf351p+0L, -0xd.0aa4076988501d8p-68L }, + { -0x5.021a95fc2db64328p+0L, -0x2.4c56e595394decc8p-64L }, + { -0x5.ffa4bd647d0357ep+0L, 0x2.b129d342ce12071cp-64L }, + { -0x6.005ac9625f233b6p+0L, -0x7.c2d96d16385cb868p-68L }, + { -0x6.fff2fddae1bbff4p+0L, 0x2.9d949a3dc02de0cp-64L }, + { -0x7.000cff7b7f87adf8p+0L, 0x3.b7d23246787d54d8p-64L }, + { -0x7.fffe5fe05673c3c8p+0L, -0x2.9e82b522b0ca9d3p-64L }, + { -0x8.0001a01459fc9f6p+0L, -0xc.b3cec1cec857667p-68L }, + { -0x8.ffffd1c425e81p+0L, 0x3.79b16a8b6da6181cp-64L }, + { -0x9.00002e3bb47d86dp+0L, -0x6.d843fedc351deb78p-64L }, + { -0x9.fffffb606bdfdcdp+0L, -0x6.2ae77a50547c69dp-68L }, + { -0xa.0000049f93bb992p+0L, -0x7.b45d95e15441e03p-64L }, + { -0xa.ffffff9466e9f1bp+0L, -0x3.6dacd2adbd18d05cp-64L }, + { -0xb.0000006b9915316p+0L, 0x2.69a590015bf1b414p-64L }, + { -0xb.fffffff70893874p+0L, 0x7.821be533c2c36878p-64L }, + { -0xc.00000008f76c773p+0L, -0x1.567c0f0250f38792p-64L }, + { -0xc.ffffffff4f6dcf6p+0L, -0x1.7f97a5ffc757d548p-64L }, + { -0xd.00000000b09230ap+0L, 0x3.f997c22e46fc1c9p-64L }, + { -0xd.fffffffff36345bp+0L, 0x4.61e7b5c1f62ee89p-64L }, + { -0xe.000000000c9cba5p+0L, -0x4.5e94e75ec5718f78p-64L }, + { -0xe.ffffffffff28c06p+0L, -0xc.6604ef30371f89dp-68L }, + { -0xf.0000000000d73fap+0L, 0xc.6642f1bdf07a161p-68L }, + { -0xf.fffffffffff28cp+0L, -0x6.0c6621f512e72e5p-64L }, + { -0x1.000000000000d74p+4L, 0x6.0c6625ebdb406c48p-64L }, + { -0x1.0ffffffffffff356p+4L, -0x9.c47e7a93e1c46a1p-64L }, + { -0x1.1000000000000caap+4L, 0x9.c47e7a97778935ap-64L }, + { -0x1.1fffffffffffff4cp+4L, 0x1.3c31dcbecd2f74d4p-64L }, + { -0x1.20000000000000b4p+4L, -0x1.3c31dcbeca4c3b3p-64L }, + { -0x1.2ffffffffffffff6p+4L, -0x8.5b25cbf5f545ceep-64L }, + { -0x1.300000000000000ap+4L, 0x8.5b25cbf5f547e48p-64L }, + { -0x1.4p+4L, 0x7.950ae90080894298p-64L }, + { -0x1.4p+4L, -0x7.950ae9008089414p-64L }, + { -0x1.5p+4L, 0x5.c6e3bdb73d5c63p-68L }, + { -0x1.5p+4L, -0x5.c6e3bdb73d5c62f8p-68L }, + { -0x1.6p+4L, 0x4.338e5b6dfe14a518p-72L }, + { -0x1.6p+4L, -0x4.338e5b6dfe14a51p-72L }, + { -0x1.7p+4L, 0x2.ec368262c7033b3p-76L }, + { -0x1.7p+4L, -0x2.ec368262c7033b3p-76L }, + { -0x1.8p+4L, 0x1.f2cf01972f577ccap-80L }, + { -0x1.8p+4L, -0x1.f2cf01972f577ccap-80L }, + { -0x1.9p+4L, 0x1.3f3ccdd165fa8d4ep-84L }, + { -0x1.9p+4L, -0x1.3f3ccdd165fa8d4ep-84L }, + { -0x1.ap+4L, 0xc.4742fe35272cd1cp-92L }, + { -0x1.ap+4L, -0xc.4742fe35272cd1cp-92L }, + { -0x1.bp+4L, 0x7.46ac70b733a8c828p-96L }, + { -0x1.bp+4L, -0x7.46ac70b733a8c828p-96L }, + { -0x1.cp+4L, 0x4.2862898d42174ddp-100L }, + { -0x1.cp+4L, -0x4.2862898d42174ddp-100L }, + { -0x1.dp+4L, 0x2.4b3f31686b15af58p-104L }, + { -0x1.dp+4L, -0x2.4b3f31686b15af58p-104L }, + { -0x1.ep+4L, 0x1.3932c5047d60e60cp-108L }, + { -0x1.ep+4L, -0x1.3932c5047d60e60cp-108L }, + { -0x1.fp+4L, 0xa.1a6973c1fade217p-116L }, + { -0x1.fp+4L, -0xa.1a6973c1fade217p-116L }, + { -0x2p+4L, 0x5.0d34b9e0fd6f10b8p-120L }, + { -0x2p+4L, -0x5.0d34b9e0fd6f10b8p-120L }, + { -0x2.1p+4L, 0x2.73024a9ba1aa36a8p-124L }, + }; + +static const long double e_hi = 0x2.b7e151628aed2a6cp+0L; +static const long double e_lo = -0x1.408ea77f630b0c38p-64L; + +/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) in Stirling's + approximation to lgamma function. */ + +static const long double lgamma_coeff[] = + { + 0x1.5555555555555556p-4L, + -0xb.60b60b60b60b60bp-12L, + 0x3.4034034034034034p-12L, + -0x2.7027027027027028p-12L, + 0x3.72a3c5631fe46aep-12L, + -0x7.daac36664f1f208p-12L, + 0x1.a41a41a41a41a41ap-8L, + -0x7.90a1b2c3d4e5f708p-8L, + 0x2.dfd2c703c0cfff44p-4L, + -0x1.6476701181f39edcp+0L, + 0xd.672219167002d3ap+0L, + -0x9.cd9292e6660d55bp+4L, + 0x8.911a740da740da7p+8L, + -0x8.d0cc570e255bf5ap+12L, + 0xa.8d1044d3708d1c2p+16L, + -0xe.8844d8a169abbc4p+20L, + }; + +#define NCOEFF (sizeof (lgamma_coeff) / sizeof (lgamma_coeff[0])) + +/* Polynomial approximations to (|gamma(x)|-1)(x-n)/(x-x0), where n is + the integer end-point of the half-integer interval containing x and + x0 is the zero of lgamma in that half-integer interval. Each + polynomial is expressed in terms of x-xm, where xm is the midpoint + of the interval for which the polynomial applies. */ + +static const long double poly_coeff[] = + { + /* Interval [-2.125, -2] (polynomial degree 13). */ + -0x1.0b71c5c54d42eb6cp+0L, + -0xc.73a1dc05f349517p-4L, + -0x1.ec841408528b6baep-4L, + -0xe.37c9da26fc3b492p-4L, + -0x1.03cd87c5178991ap-4L, + -0xe.ae9ada65ece2f39p-4L, + 0x9.b1185505edac18dp-8L, + -0xe.f28c130b54d3cb2p-4L, + 0x2.6ec1666cf44a63bp-4L, + -0xf.57cb2774193bbd5p-4L, + 0x4.5ae64671a41b1c4p-4L, + -0xf.f48ea8b5bd3a7cep-4L, + 0x6.7d73788a8d30ef58p-4L, + -0x1.11e0e4b506bd272ep+0L, + /* Interval [-2.25, -2.125] (polynomial degree 13). */ + -0xf.2930890d7d675a8p-4L, + -0xc.a5cfde054eab5cdp-4L, + 0x3.9c9e0fdebb0676e4p-4L, + -0x1.02a5ad35605f0d8cp+0L, + 0x9.6e9b1185d0b92edp-4L, + -0x1.4d8332f3d6a3959p+0L, + 0x1.1c0c8cacd0ced3eap+0L, + -0x1.c9a6f592a67b1628p+0L, + 0x1.d7e9476f96aa4bd6p+0L, + -0x2.921cedb488bb3318p+0L, + 0x2.e8b3fd6ca193e4c8p+0L, + -0x3.cb69d9d6628e4a2p+0L, + 0x4.95f12c73b558638p+0L, + -0x5.d392d0b97c02ab6p+0L, + /* Interval [-2.375, -2.25] (polynomial degree 14). */ + -0xd.7d28d505d618122p-4L, + -0xe.69649a304098532p-4L, + 0xb.0d74a2827d055c5p-4L, + -0x1.924b09228531c00ep+0L, + 0x1.d49b12bccee4f888p+0L, + -0x3.0898bb7dbb21e458p+0L, + 0x4.207a6cad6fa10a2p+0L, + -0x6.39ee630b46093ad8p+0L, + 0x8.e2e25211a3fb5ccp+0L, + -0xd.0e85ccd8e79c08p+0L, + 0x1.2e45882bc17f9e16p+4L, + -0x1.b8b6e841815ff314p+4L, + 0x2.7ff8bf7504fa04dcp+4L, + -0x3.c192e9c903352974p+4L, + 0x5.8040b75f4ef07f98p+4L, + /* Interval [-2.5, -2.375] (polynomial degree 15). */ + -0xb.74ea1bcfff94b2cp-4L, + -0x1.2a82bd590c375384p+0L, + 0x1.88020f828b968634p+0L, + -0x3.32279f040eb80fa4p+0L, + 0x5.57ac825175943188p+0L, + -0x9.c2aedcfe10f129ep+0L, + 0x1.12c132f2df02881ep+4L, + -0x1.ea94e26c0b6ffa6p+4L, + 0x3.66b4a8bb0290013p+4L, + -0x6.0cf735e01f5990bp+4L, + 0xa.c10a8db7ae99343p+4L, + -0x1.31edb212b315feeap+8L, + 0x2.1f478592298b3ebp+8L, + -0x3.c546da5957ace6ccp+8L, + 0x7.0e3d2a02579ba4bp+8L, + -0xc.b1ea961c39302f8p+8L, + /* Interval [-2.625, -2.5] (polynomial degree 16). */ + -0x3.d10108c27ebafad4p-4L, + 0x1.cd557caff7d2b202p+0L, + 0x3.819b4856d3995034p+0L, + 0x6.8505cbad03dd3bd8p+0L, + 0xb.c1b2e653aa0b924p+0L, + 0x1.50a53a38f05f72d6p+4L, + 0x2.57ae00cbd06efb34p+4L, + 0x4.2b1563077a577e9p+4L, + 0x7.6989ed790138a7f8p+4L, + 0xd.2dd28417b4f8406p+4L, + 0x1.76e1b71f0710803ap+8L, + 0x2.9a7a096254ac032p+8L, + 0x4.a0e6109e2a039788p+8L, + 0x8.37ea17a93c877b2p+8L, + 0xe.9506a641143612bp+8L, + 0x1.b680ed4ea386d52p+12L, + 0x3.28a2130c8de0ae84p+12L, + /* Interval [-2.75, -2.625] (polynomial degree 15). */ + -0x6.b5d252a56e8a7548p-4L, + 0x1.28d60383da3ac72p+0L, + 0x1.db6513ada8a6703ap+0L, + 0x2.e217118f9d34aa7cp+0L, + 0x4.450112c5cbd6256p+0L, + 0x6.4af99151e972f92p+0L, + 0x9.2db598b5b183cd6p+0L, + 0xd.62bef9c9adcff6ap+0L, + 0x1.379f290d743d9774p+4L, + 0x1.c58271ff823caa26p+4L, + 0x2.93a871b87a06e73p+4L, + 0x3.bf9db66103d7ec98p+4L, + 0x5.73247c111fbf197p+4L, + 0x7.ec8b9973ba27d008p+4L, + 0xb.eca5f9619b39c03p+4L, + 0x1.18f2e46411c78b1cp+8L, + /* Interval [-2.875, -2.75] (polynomial degree 14). */ + -0x8.a41b1e4f36ff88ep-4L, + 0xc.da87d3b69dc0f34p-4L, + 0x1.1474ad5c36158ad2p+0L, + 0x1.761ecb90c5553996p+0L, + 0x1.d279bff9ae234f8p+0L, + 0x2.4e5d0055a16c5414p+0L, + 0x2.d57545a783902f8cp+0L, + 0x3.8514eec263aa9f98p+0L, + 0x4.5235e338245f6fe8p+0L, + 0x5.562b1ef200b256c8p+0L, + 0x6.8ec9782b93bd565p+0L, + 0x8.14baf4836483508p+0L, + 0x9.efaf35dc712ea79p+0L, + 0xc.8431f6a226507a9p+0L, + 0xf.80358289a768401p+0L, + /* Interval [-3, -2.875] (polynomial degree 13). */ + -0xa.046d667e468f3e4p-4L, + 0x9.70b88dcc006c216p-4L, + 0xa.a8a39421c86ce9p-4L, + 0xd.2f4d1363f321e89p-4L, + 0xd.ca9aa1a3ab2f438p-4L, + 0xf.cf09c31f05d02cbp-4L, + 0x1.04b133a195686a38p+0L, + 0x1.22b54799d0072024p+0L, + 0x1.2c5802b869a36ae8p+0L, + 0x1.4aadf23055d7105ep+0L, + 0x1.5794078dd45c55d6p+0L, + 0x1.7759069da18bcf0ap+0L, + 0x1.8e672cefa4623f34p+0L, + 0x1.b2acfa32c17145e6p+0L, + }; + +static const size_t poly_deg[] = + { + 13, + 13, + 14, + 15, + 16, + 15, + 14, + 13, + }; + +static const size_t poly_end[] = + { + 13, + 27, + 42, + 58, + 75, + 91, + 106, + 120, + }; + +/* Compute sin (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_sinpi (long double x) +{ + if (x <= 0.25L) + return __sinl (M_PIl * x); + else + return __cosl (M_PIl * (0.5L - x)); +} + +/* Compute cos (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_cospi (long double x) +{ + if (x <= 0.25L) + return __cosl (M_PIl * x); + else + return __sinl (M_PIl * (0.5L - x)); +} + +/* Compute cot (pi * X) for -0.25 <= X <= 0.5. */ + +static long double +lg_cotpi (long double x) +{ + return lg_cospi (x) / lg_sinpi (x); +} + +/* Compute lgamma of a negative argument -33 < X < -2, setting + *SIGNGAMP accordingly. */ + +long double +__lgamma_negl (long double x, int *signgamp) +{ + /* Determine the half-integer region X lies in, handle exact + integers and determine the sign of the result. */ + int i = __floorl (-2 * x); + if ((i & 1) == 0 && i == -2 * x) + return 1.0L / 0.0L; + long double xn = ((i & 1) == 0 ? -i / 2 : (-i - 1) / 2); + i -= 4; + *signgamp = ((i & 2) == 0 ? -1 : 1); + + SET_RESTORE_ROUNDL (FE_TONEAREST); + + /* Expand around the zero X0 = X0_HI + X0_LO. */ + long double x0_hi = lgamma_zeros[i][0], x0_lo = lgamma_zeros[i][1]; + long double xdiff = x - x0_hi - x0_lo; + + /* For arguments in the range -3 to -2, use polynomial + approximations to an adjusted version of the gamma function. */ + if (i < 2) + { + int j = __floorl (-8 * x) - 16; + long double xm = (-33 - 2 * j) * 0.0625L; + long double x_adj = x - xm; + size_t deg = poly_deg[j]; + size_t end = poly_end[j]; + long double g = poly_coeff[end]; + for (size_t j = 1; j <= deg; j++) + g = g * x_adj + poly_coeff[end - j]; + return __log1pl (g * xdiff / (x - xn)); + } + + /* The result we want is log (sinpi (X0) / sinpi (X)) + + log (gamma (1 - X0) / gamma (1 - X)). */ + long double x_idiff = fabsl (xn - x), x0_idiff = fabsl (xn - x0_hi - x0_lo); + long double log_sinpi_ratio; + if (x0_idiff < x_idiff * 0.5L) + /* Use log not log1p to avoid inaccuracy from log1p of arguments + close to -1. */ + log_sinpi_ratio = __ieee754_logl (lg_sinpi (x0_idiff) + / lg_sinpi (x_idiff)); + else + { + /* Use log1p not log to avoid inaccuracy from log of arguments + close to 1. X0DIFF2 has positive sign if X0 is further from + XN than X is from XN, negative sign otherwise. */ + long double x0diff2 = ((i & 1) == 0 ? xdiff : -xdiff) * 0.5L; + long double sx0d2 = lg_sinpi (x0diff2); + long double cx0d2 = lg_cospi (x0diff2); + log_sinpi_ratio = __log1pl (2 * sx0d2 + * (-sx0d2 + cx0d2 * lg_cotpi (x_idiff))); + } + + long double log_gamma_ratio; + long double y0 = 1 - x0_hi; + long double y0_eps = -x0_hi + (1 - y0) - x0_lo; + long double y = 1 - x; + long double y_eps = -x + (1 - y); + /* We now wish to compute LOG_GAMMA_RATIO + = log (gamma (Y0 + Y0_EPS) / gamma (Y + Y_EPS)). XDIFF + accurately approximates the difference Y0 + Y0_EPS - Y - + Y_EPS. Use Stirling's approximation. First, we may need to + adjust into the range where Stirling's approximation is + sufficiently accurate. */ + long double log_gamma_adj = 0; + if (i < 8) + { + int n_up = (9 - i) / 2; + long double ny0, ny0_eps, ny, ny_eps; + ny0 = y0 + n_up; + ny0_eps = y0 - (ny0 - n_up) + y0_eps; + y0 = ny0; + y0_eps = ny0_eps; + ny = y + n_up; + ny_eps = y - (ny - n_up) + y_eps; + y = ny; + y_eps = ny_eps; + long double prodm1 = __lgamma_productl (xdiff, y - n_up, y_eps, n_up); + log_gamma_adj = -__log1pl (prodm1); + } + long double log_gamma_high + = (xdiff * __log1pl ((y0 - e_hi - e_lo + y0_eps) / e_hi) + + (y - 0.5L + y_eps) * __log1pl (xdiff / y) + log_gamma_adj); + /* Compute the sum of (B_2k / 2k(2k-1))(Y0^-(2k-1) - Y^-(2k-1)). */ + long double y0r = 1 / y0, yr = 1 / y; + long double y0r2 = y0r * y0r, yr2 = yr * yr; + long double rdiff = -xdiff / (y * y0); + long double bterm[NCOEFF]; + long double dlast = rdiff, elast = rdiff * yr * (yr + y0r); + bterm[0] = dlast * lgamma_coeff[0]; + for (size_t j = 1; j < NCOEFF; j++) + { + long double dnext = dlast * y0r2 + elast; + long double enext = elast * yr2; + bterm[j] = dnext * lgamma_coeff[j]; + dlast = dnext; + elast = enext; + } + long double log_gamma_low = 0; + for (size_t j = 0; j < NCOEFF; j++) + log_gamma_low += bterm[NCOEFF - 1 - j]; + log_gamma_ratio = log_gamma_high + log_gamma_low; + + return log_sinpi_ratio + log_gamma_ratio; +} diff --git a/sysdeps/ieee754/ldbl-96/lgamma_product.c b/sysdeps/ieee754/ldbl-96/lgamma_product.c new file mode 100644 index 0000000000..ada1526910 --- /dev/null +++ b/sysdeps/ieee754/ldbl-96/lgamma_product.c @@ -0,0 +1,37 @@ +/* Compute a product of 1 + (T/X), 1 + (T/(X+1)), .... + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + + 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that + all the values X + 1, ..., X + N - 1 are exactly representable, and + X_EPS / X is small enough that factors quadratic in it can be + neglected. */ + +double +__lgamma_product (double t, double x, double x_eps, int n) +{ + long double x_full = (long double) x + (long double) x_eps; + long double ret = 0; + for (int i = 0; i < n; i++) + ret += (t / (x_full + i)) * (1 + ret); + return ret; +} diff --git a/sysdeps/ieee754/ldbl-96/lgamma_productl.c b/sysdeps/ieee754/ldbl-96/lgamma_productl.c new file mode 100644 index 0000000000..cf0c778d93 --- /dev/null +++ b/sysdeps/ieee754/ldbl-96/lgamma_productl.c @@ -0,0 +1,82 @@ +/* Compute a product of 1 + (T/X), 1 + (T/(X+1)), .... + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static void +mul_split (long double *hi, long double *lo, long double x, long double y) +{ +#ifdef __FP_FAST_FMAL + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fmal (x, y, -*hi); +#elif defined FP_FAST_FMAL + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fmal (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) + long double x1 = x * C; + long double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + long double x2 = x - x1; + long double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS + + 1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1. X is such that + all the values X + 1, ..., X + N - 1 are exactly representable, and + X_EPS / X is small enough that factors quadratic in it can be + neglected. */ + +long double +__lgamma_productl (long double t, long double x, long double x_eps, int n) +{ + long double ret = 0, ret_eps = 0; + for (int i = 0; i < n; i++) + { + long double xi = x + i; + long double quot = t / xi; + long double mhi, mlo; + mul_split (&mhi, &mlo, quot, xi); + long double quot_lo = (t - mhi - mlo) / xi - t * x_eps / (xi * xi); + /* We want (1 + RET + RET_EPS) * (1 + QUOT + QUOT_LO) - 1. */ + long double rhi, rlo; + mul_split (&rhi, &rlo, ret, quot); + long double rpq = ret + quot; + long double rpq_eps = (ret - rpq) + quot; + long double nret = rpq + rhi; + long double nret_eps = (rpq - nret) + rhi; + ret_eps += (rpq_eps + nret_eps + rlo + ret_eps * quot + + quot_lo + quot_lo * (ret + ret_eps)); + ret = nret; + } + return ret + ret_eps; +} diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 19d263e481..b4a65d04f0 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1642,36 +1642,36 @@ ildouble: 4 ldouble: 4 Function: "gamma": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "gamma_downward": -double: 4 +double: 3 float: 3 -idouble: 4 +idouble: 3 ifloat: 3 -ildouble: 6 -ldouble: 6 +ildouble: 3 +ldouble: 3 -Function: "gamma_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 6 -ldouble: 6 +Function: "gamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 -Function: "gamma_upward": -double: 4 +Function: "gamma_towardzero": +double: 5 float: 4 -idouble: 4 +idouble: 5 ifloat: 4 -ildouble: 4 -ldouble: 4 +ildouble: 7 +ldouble: 7 + +Function: "gamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 Function: "hypot": double: 1 @@ -1794,36 +1794,36 @@ ildouble: 5 ldouble: 5 Function: "lgamma": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "lgamma_downward": -double: 4 +double: 3 float: 3 -idouble: 4 +idouble: 3 ifloat: 3 -ildouble: 6 -ldouble: 6 +ildouble: 3 +ldouble: 3 -Function: "lgamma_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 6 -ldouble: 6 +Function: "lgamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 -Function: "lgamma_upward": -double: 4 +Function: "lgamma_towardzero": +double: 5 float: 4 -idouble: 4 +idouble: 5 ifloat: 4 -ildouble: 4 -ldouble: 4 +ildouble: 7 +ldouble: 7 + +Function: "lgamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 Function: "log": float: 1 |