diff options
Diffstat (limited to 'sysdeps')
227 files changed, 3415 insertions, 5131 deletions
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index 234a6c457c..be8541f649 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -41,8 +41,6 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \ v_log10_data \ erf_data \ erff_data \ - sv_erf_data \ - sv_erff_data \ v_exp_tail_data \ erfc_data \ erfcf_data \ diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index cc15ce2d1e..015211f5f4 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -135,4 +135,11 @@ libmvec { _ZGVsMxv_tanh; _ZGVsMxv_tanhf; } + GLIBC_2.41 { + _ZGVnN2v_logp1; + _ZGVnN2v_logp1f; + _ZGVnN4v_logp1f; + _ZGVsMxv_logp1; + _ZGVsMxv_logp1f; + } } diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c index 8916dcbf40..004474acf9 100644 --- a/sysdeps/aarch64/fpu/acoshf_advsimd.c +++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c @@ -25,35 +25,32 @@ const static struct data { struct v_log1pf_data log1pf_consts; uint32x4_t one; - uint16x4_t thresh; -} data = { - .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, - .one = V4 (0x3f800000), - .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ -}; +} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) }; + +#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ static float32x4_t NOINLINE VPCS_ATTR special_case (float32x4_t x, float32x4_t y, uint16x4_t special, - const struct v_log1pf_data d) + const struct v_log1pf_data *d) { return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special)); } /* Vector approximation for single-precision acosh, based on log1p. Maximum error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it - is 2.78 ULP: - __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 - want 0x1.ef9ea2p-3. + is 3.00 ULP: + _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4 + want 0x1.ef0a7cp-4. With exceptions disabled, we can compute u with a shorter dependency chain, - which gives maximum error of 3.07 ULP: - __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 - want 0x1.fbc7f4p-4. */ + which gives maximum error of 3.22 ULP: + _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5 + want 0x1.fdcdd2p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); uint32x4_t ix = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh); + uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh); #if WANT_SIMD_EXCEPT /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use @@ -64,15 +61,16 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p); float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1); #else - float32x4_t xm1 = vsubq_f32 (x, v_f32 (1)); - float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f))); + float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one)); + float32x4_t u + = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one))); #endif float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u)); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, special, d->log1pf_consts); - return log1pf_inline (y, d->log1pf_consts); + return special_case (x, y, special, &d->log1pf_consts); + return log1pf_inline (y, &d->log1pf_consts); } libmvec_hidden_def (V_NAME_F1 (acosh)) HALF_WIDTH_ALIAS_F1 (acosh) diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 097d403ffe..5909bb4ce9 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -36,6 +36,7 @@ libmvec_hidden_proto (V_NAME_F2(hypot)); libmvec_hidden_proto (V_NAME_F1(log10)); libmvec_hidden_proto (V_NAME_F1(log1p)); libmvec_hidden_proto (V_NAME_F1(log2)); +libmvec_hidden_proto (V_NAME_F1(logp1)); libmvec_hidden_proto (V_NAME_F1(log)); libmvec_hidden_proto (V_NAME_F2(pow)); libmvec_hidden_proto (V_NAME_F1(sin)); diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c index 09fd8a6143..eb789b91b6 100644 --- a/sysdeps/aarch64/fpu/asinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c @@ -20,16 +20,16 @@ #include "v_math.h" #include "v_log1pf_inline.h" -#define SignMask v_u32 (0x80000000) - const static struct data { struct v_log1pf_data log1pf_consts; + float32x4_t one; uint32x4_t big_bound; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound; #endif } data = { + .one = V4 (1), .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .big_bound = V4 (0x5f800000), /* asuint(0x1p64). */ #if WANT_SIMD_EXCEPT @@ -38,20 +38,27 @@ const static struct data }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t sign, float32x4_t y, + uint32x4_t special, const struct data *d) { - return v_call_f32 (asinhf, x, y, special); + return v_call_f32 ( + asinhf, x, + vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))), + special); } /* Single-precision implementation of vector asinh(x), using vector log1p. - Worst-case error is 2.66 ULP, at roughly +/-0.25: - __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ + Worst-case error is 2.59 ULP: + _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3 + want 0x1.d449c4p-3. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) { const struct data *dat = ptr_barrier (&data); - uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask); - float32x4_t ax = vreinterpretq_f32_u32 (iax); + float32x4_t ax = vabsq_f32 (x); + uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t special = vcgeq_u32 (iax, dat->big_bound); + uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax); float32x4_t special_arg = x; #if WANT_SIMD_EXCEPT @@ -68,13 +75,13 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) /* asinh(x) = log(x + sqrt(x * x + 1)). For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */ float32x4_t d - = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x))); - float32x4_t y = log1pf_inline ( - vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts); + = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax))); + float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)); if (__glibc_unlikely (v_any_u32 (special))) - return special_case (special_arg, vbslq_f32 (SignMask, x, y), special); - return vbslq_f32 (SignMask, x, y); + return special_case (special_arg, sign, y, special, dat); + return vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts)))); } libmvec_hidden_def (V_NAME_F1 (asinh)) HALF_WIDTH_ALIAS_F1 (asinh) diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c index ae488f7b54..818b6c92ad 100644 --- a/sysdeps/aarch64/fpu/atanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c @@ -40,15 +40,17 @@ const static struct data #define Half v_u32 (0x3f000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y, + uint32x4_t special) { - return v_call_f32 (atanhf, x, y, special); + return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign), + vmulq_f32 (halfsign, y), special); } /* Approximation for vector single-precision atanh(x) using modified log1p. - The maximum error is 3.08 ULP: - __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5 - want 0x1.ffcb82p-5. */ + The maximum error is 2.93 ULP: + _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5 + want 0x1.f4dcf8p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); @@ -68,11 +70,19 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) uint32x4_t special = vcgeq_u32 (iax, d->one); #endif - float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax)); - y = log1pf_inline (y, d->log1pf_consts); + float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), + vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax)); + y = log1pf_inline (y, &d->log1pf_consts); + /* If exceptions not required, pass ax to special-case for shorter dependency + chain. If exceptions are required ax will have been zerofied, so have to + pass x. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (halfsign, y), special); +#if WANT_SIMD_EXCEPT + return special_case (x, halfsign, y, special); +#else + return special_case (ax, halfsign, y, special); +#endif return vmulq_f32 (halfsign, y); } libmvec_hidden_def (V_NAME_F1 (atanh)) diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index 7484150131..f295fe185d 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -113,6 +113,10 @@ # define __DECL_SIMD_log2 __DECL_SIMD_aarch64 # undef __DECL_SIMD_log2f # define __DECL_SIMD_log2f __DECL_SIMD_aarch64 +# undef __DECL_SIMD_logp1 +# define __DECL_SIMD_logp1 __DECL_SIMD_aarch64 +# undef __DECL_SIMD_logp1f +# define __DECL_SIMD_logp1f __DECL_SIMD_aarch64 # undef __DECL_SIMD_pow # define __DECL_SIMD_pow __DECL_SIMD_aarch64 # undef __DECL_SIMD_powf @@ -180,6 +184,7 @@ __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t); @@ -207,6 +212,7 @@ __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t); @@ -239,6 +245,7 @@ __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t); @@ -266,6 +273,7 @@ __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c index 3924c9ce44..11a89b1530 100644 --- a/sysdeps/aarch64/fpu/cos_advsimd.c +++ b/sysdeps/aarch64/fpu/cos_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* Worst-case error is 3.3 ulp in [-pi/2, pi/2]. */ .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), @@ -30,11 +30,9 @@ static const struct data V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33), V2 (-0x1.9e9540300a1p-41) }, .inv_pi = V2 (0x1.45f306dc9c883p-2), - .half_pi = V2 (0x1.921fb54442d18p+0), .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), .range_val = V2 (0x1p23) }; @@ -68,10 +66,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi)); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); - n = vsubq_f64 (n, v_f64 (0.5)); + n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); + n = vsubq_f64 (n, v_f64 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c index d0c285b03a..85a1b37373 100644 --- a/sysdeps/aarch64/fpu/cosf_advsimd.c +++ b/sysdeps/aarch64/fpu/cosf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,8 +33,6 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), - .half_pi = V4 (0x1.921fb6p0f), .range_val = V4 (0x1p20f) }; @@ -69,9 +67,8 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi)); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); n = vsubq_f32 (n, v_f32 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c index e5d8a299c6..7ad6efa0fc 100644 --- a/sysdeps/aarch64/fpu/coshf_sve.c +++ b/sysdeps/aarch64/fpu/coshf_sve.c @@ -23,37 +23,42 @@ static const struct data { struct sv_expf_data expf_consts; - uint32_t special_bound; + float special_bound; } data = { .expf_consts = SV_EXPF_DATA, /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */ - .special_bound = 0x42ad496c, + .special_bound = 0x1.5a92d8p+6, }; static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t pg) +special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e, + svbool_t pg) { - return sv_call_f32 (coshf, x, y, pg); + return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e), + pg); } /* Single-precision vector cosh, using vector expf. - Maximum error is 1.89 ULP: - _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127 - want 0x1.f00adcp+127. */ + Maximum error is 2.77 ULP: + _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2 + want 0x1.e4594cp+2. */ svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svfloat32_t ax = svabs_x (pg, x); - svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound); + svbool_t special = svacge (pg, x, d->special_bound); - /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ - svfloat32_t t = expf_inline (ax, pg, &d->expf_consts); - svfloat32_t half_t = svmul_x (pg, t, 0.5); - svfloat32_t half_over_t = svdivr_x (pg, t, 0.5); + /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. + Note that x is passed to exp here, rather than |x|. This is to avoid using + destructive unary ABS for better register usage. However it means the + routine is not exactly symmetrical, as the exp helper is slightly less + accurate in the negative range. */ + svfloat32_t e = expf_inline (x, pg, &d->expf_consts); + svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5); + svfloat32_t half_over_e = svdivr_x (pg, e, 0.5); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (x, half_e, half_over_e, special); - return svadd_x (pg, half_t, half_over_t); + return svadd_x (svptrue_b32 (), half_e, half_over_e); } diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c index 19cbb7d0f4..a48092e838 100644 --- a/sysdeps/aarch64/fpu/erf_advsimd.c +++ b/sysdeps/aarch64/fpu/erf_advsimd.c @@ -22,19 +22,21 @@ static const struct data { float64x2_t third; - float64x2_t tenth, two_over_five, two_over_fifteen; - float64x2_t two_over_nine, two_over_fortyfive; + float64x2_t tenth, two_over_five, two_over_nine; + double two_over_fifteen, two_over_fortyfive; float64x2_t max, shift; + uint64x2_t max_idx; #if WANT_SIMD_EXCEPT float64x2_t tiny_bound, huge_bound, scale_minus_one; #endif } data = { + .max_idx = V2 (768), .third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too. */ - .two_over_fifteen = V2 (0x1.1111111111111p-3), + .two_over_fifteen = 0x1.1111111111111p-3, .tenth = V2 (-0x1.999999999999ap-4), .two_over_five = V2 (-0x1.999999999999ap-2), .two_over_nine = V2 (-0x1.c71c71c71c71cp-3), - .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5), + .two_over_fortyfive = 0x1.6c16c16c16c17p-5, .max = V2 (5.9921875), /* 6 - 1/128. */ .shift = V2 (0x1p45), #if WANT_SIMD_EXCEPT @@ -56,8 +58,8 @@ static inline struct entry lookup (uint64x2_t i) { struct entry e; - float64x2_t e1 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 0)].erf), - e2 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 1)].erf); + float64x2_t e1 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 0)].erf), + e2 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 1)].erf); e.erf = vuzp1q_f64 (e1, e2); e.scale = vuzp2q_f64 (e1, e2); return e; @@ -87,8 +89,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) float64x2_t a = vabsq_f64 (x); /* Reciprocal conditions that do not catch NaNs so they can be used in BSLs to return expected results. */ - uint64x2_t a_le_max = vcleq_f64 (a, dat->max); - uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max); + uint64x2_t a_le_max = vcaleq_f64 (x, dat->max); + uint64x2_t a_gt_max = vcagtq_f64 (x, dat->max); #if WANT_SIMD_EXCEPT /* |x| huge or tiny. */ @@ -115,7 +117,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) segfault. */ uint64x2_t i = vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift)); - i = vbslq_u64 (a_le_max, i, v_u64 (768)); + i = vbslq_u64 (a_le_max, i, dat->max_idx); struct entry e = lookup (i); float64x2_t r = vsubq_f64 (z, shift); @@ -125,14 +127,19 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) float64x2_t d2 = vmulq_f64 (d, d); float64x2_t r2 = vmulq_f64 (r, r); + float64x2_t two_over_fifteen_and_fortyfive + = vld1q_f64 (&dat->two_over_fifteen); + /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */ float64x2_t p1 = r; float64x2_t p2 = vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third)); float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third)); - float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen); + float64x2_t p4 = vfmaq_laneq_f64 (dat->two_over_five, r2, + two_over_fifteen_and_fortyfive, 0); p4 = vfmsq_f64 (dat->tenth, r2, p4); - float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive); + float64x2_t p5 = vfmaq_laneq_f64 (dat->two_over_nine, r2, + two_over_fifteen_and_fortyfive, 1); p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5)); float64x2_t p34 = vfmaq_f64 (p3, d, p4); diff --git a/sysdeps/aarch64/fpu/erf_data.c b/sysdeps/aarch64/fpu/erf_data.c index 6d2dcd235c..ea01fad7ca 100644 --- a/sysdeps/aarch64/fpu/erf_data.c +++ b/sysdeps/aarch64/fpu/erf_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erf. +/* Lookup table used in vector erf. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = 6.0 (769 values): - - the first entry __erff_data.tab.erf contains the values of erf(r), - - the second entry __erff_data.tab.scale contains the values of + - the first entry __v_erff_data.tab.erf contains the values of erf(r), + - the second entry __v_erff_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct erf_data __erf_data = { +const struct v_erf_data __v_erf_data = { .tab = { { 0x0.0000000000000p+0, 0x1.20dd750429b6dp+0 }, { 0x1.20dbf3deb1340p-7, 0x1.20d8f1975c85dp+0 }, { 0x1.20d77083f17a0p-6, 0x1.20cb67bd452c7p+0 }, diff --git a/sysdeps/aarch64/fpu/erf_sve.c b/sysdeps/aarch64/fpu/erf_sve.c index 7d51417406..671d55a02b 100644 --- a/sysdeps/aarch64/fpu/erf_sve.c +++ b/sysdeps/aarch64/fpu/erf_sve.c @@ -67,14 +67,16 @@ svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg) svfloat64_t a = svabs_x (pg, x); svfloat64_t shift = sv_f64 (dat->shift); svfloat64_t z = svadd_x (pg, a, shift); - svuint64_t i - = svsub_x (pg, svreinterpret_u64 (z), svreinterpret_u64 (shift)); + svuint64_t i = svand_x (pg, svreinterpret_u64 (z), 0xfff); + i = svadd_x (pg, i, i); /* Lookup without shortcut for small values but with predicate to avoid segfault for large values and NaNs. */ svfloat64_t r = svsub_x (pg, z, shift); - svfloat64_t erfr = svld1_gather_index (a_lt_max, __sv_erf_data.erf, i); - svfloat64_t scale = svld1_gather_index (a_lt_max, __sv_erf_data.scale, i); + svfloat64_t erfr + = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].erf, i); + svfloat64_t scale + = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].scale, i); /* erf(x) ~ erf(r) + scale * d * poly (r, d). */ svfloat64_t d = svsub_x (pg, a, r); diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c index f1b3bfe830..d05eac61a2 100644 --- a/sysdeps/aarch64/fpu/erfc_advsimd.c +++ b/sysdeps/aarch64/fpu/erfc_advsimd.c @@ -24,8 +24,8 @@ static const struct data { uint64x2_t offset, table_scale; float64x2_t max, shift; - float64x2_t p20, p40, p41, p42; - float64x2_t p51, p52; + float64x2_t p20, p40, p41, p51; + double p42, p52; double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2]; #if WANT_SIMD_EXCEPT float64x2_t uflow_bound; @@ -41,9 +41,9 @@ static const struct data .p20 = V2 (0x1.5555555555555p-2), /* 1/3, used to compute 2/3 and 1/6. */ .p40 = V2 (-0x1.999999999999ap-4), /* 1/10. */ .p41 = V2 (-0x1.999999999999ap-2), /* 2/5. */ - .p42 = V2 (0x1.1111111111111p-3), /* 2/15. */ + .p42 = 0x1.1111111111111p-3, /* 2/15. */ .p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9. */ - .p52 = V2 (0x1.6c16c16c16c17p-5), /* 2/45. */ + .p52 = 0x1.6c16c16c16c17p-5, /* 2/45. */ /* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9. */ .qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 }, .qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 }, @@ -69,9 +69,9 @@ lookup (uint64x2_t i) { struct entry e; float64x2_t e1 - = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc); + = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc); float64x2_t e2 - = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc); + = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc); e.erfc = vuzp1q_f64 (e1, e2); e.scale = vuzp2q_f64 (e1, e2); return e; @@ -157,9 +157,10 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x) float64x2_t p1 = r; float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20)); float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20)); - float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42); + float64x2_t p42_p52 = vld1q_f64 (&dat->p42); + float64x2_t p4 = vfmaq_laneq_f64 (dat->p41, r2, p42_p52, 0); p4 = vfmsq_f64 (dat->p40, r2, p4); - float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52); + float64x2_t p5 = vfmaq_laneq_f64 (dat->p51, r2, p42_p52, 1); p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5)); /* Compute p_i using recurrence relation: p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */ diff --git a/sysdeps/aarch64/fpu/erfc_data.c b/sysdeps/aarch64/fpu/erfc_data.c index 76a94e4681..8dc6a8c42c 100644 --- a/sysdeps/aarch64/fpu/erfc_data.c +++ b/sysdeps/aarch64/fpu/erfc_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erfc. +/* Lookup table used in vector erfc. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = ~27.0 (3488 values): - - the first entry __erfc_data.tab.erfc contains the values of erfc(r), - - the second entry __erfc_data.tab.scale contains the values of + - the first entry __v_erfc_data.tab.erfc contains the values of erfc(r), + - the second entry __v_erfc_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore they are scaled by a large enough value 2^128 (fits in 8bit). */ -const struct erfc_data __erfc_data = { +const struct v_erfc_data __v_erfc_data = { .tab = { { 0x1p128, 0x1.20dd750429b6dp128 }, { 0x1.fb7c9030853b3p127, 0x1.20d8f1975c85dp128 }, { 0x1.f6f9447be0743p127, 0x1.20cb67bd452c7p128 }, diff --git a/sysdeps/aarch64/fpu/erfc_sve.c b/sysdeps/aarch64/fpu/erfc_sve.c index c17d3e4484..703926ee41 100644 --- a/sysdeps/aarch64/fpu/erfc_sve.c +++ b/sysdeps/aarch64/fpu/erfc_sve.c @@ -104,7 +104,7 @@ svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg) /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ i = svadd_x (pg, i, i); - const float64_t *p = &__erfc_data.tab[0].erfc - 2 * dat->off_arr; + const float64_t *p = &__v_erfc_data.tab[0].erfc - 2 * dat->off_arr; svfloat64_t erfcr = svld1_gather_index (pg, p, i); svfloat64_t scale = svld1_gather_index (pg, p + 1, i); diff --git a/sysdeps/aarch64/fpu/erfcf_advsimd.c b/sysdeps/aarch64/fpu/erfcf_advsimd.c index ca5bc3ab33..59b0b0d64b 100644 --- a/sysdeps/aarch64/fpu/erfcf_advsimd.c +++ b/sysdeps/aarch64/fpu/erfcf_advsimd.c @@ -62,13 +62,13 @@ lookup (uint32x4_t i) { struct entry e; float32x2_t t0 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc); float32x2_t t1 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc); float32x2_t t2 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc); float32x2_t t3 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc); float32x4_t e1 = vcombine_f32 (t0, t1); float32x4_t e2 = vcombine_f32 (t2, t3); e.erfc = vuzp1q_f32 (e1, e2); diff --git a/sysdeps/aarch64/fpu/erfcf_data.c b/sysdeps/aarch64/fpu/erfcf_data.c index 77fb889a78..d45087bbb9 100644 --- a/sysdeps/aarch64/fpu/erfcf_data.c +++ b/sysdeps/aarch64/fpu/erfcf_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erfcf. +/* Lookup table used in vector erfcf. For each possible rounded input r (multiples of 1/64), between r = 0.0 and r = 10.0625 (645 values): - - the first entry __erfcf_data.tab.erfc contains the values of erfc(r), - - the second entry __erfcf_data.tab.scale contains the values of + - the first entry __v_erfcf_data.tab.erfc contains the values of erfc(r), + - the second entry __v_erfcf_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore they are scaled by a large enough value 2^47 (fits in 8 bits). */ -const struct erfcf_data __erfcf_data = { +const struct v_erfcf_data __v_erfcf_data = { .tab = { { 0x1p47, 0x1.20dd76p47 }, { 0x1.f6f944p46, 0x1.20cb68p47 }, { 0x1.edf3aap46, 0x1.209546p47 }, diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c index 48d1677eb4..ecacb933ac 100644 --- a/sysdeps/aarch64/fpu/erfcf_sve.c +++ b/sysdeps/aarch64/fpu/erfcf_sve.c @@ -77,7 +77,7 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ i = svmul_x (pg, i, 2); - const float32_t *p = &__erfcf_data.tab[0].erfc - 2 * dat->off_arr; + const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr; svfloat32_t erfcr = svld1_gather_index (pg, p, i); svfloat32_t scale = svld1_gather_index (pg, p + 1, i); diff --git a/sysdeps/aarch64/fpu/erff_advsimd.c b/sysdeps/aarch64/fpu/erff_advsimd.c index f2fe6ff236..db39e789b6 100644 --- a/sysdeps/aarch64/fpu/erff_advsimd.c +++ b/sysdeps/aarch64/fpu/erff_advsimd.c @@ -47,10 +47,10 @@ static inline struct entry lookup (uint32x4_t i) { struct entry e; - float32x2_t t0 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 0)].erf); - float32x2_t t1 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 1)].erf); - float32x2_t t2 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 2)].erf); - float32x2_t t3 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 3)].erf); + float32x2_t t0 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 0)].erf); + float32x2_t t1 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 1)].erf); + float32x2_t t2 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 2)].erf); + float32x2_t t3 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 3)].erf); float32x4_t e1 = vcombine_f32 (t0, t1); float32x4_t e2 = vcombine_f32 (t2, t3); e.erf = vuzp1q_f32 (e1, e2); diff --git a/sysdeps/aarch64/fpu/erff_data.c b/sysdeps/aarch64/fpu/erff_data.c index 9a32940915..da38aed205 100644 --- a/sysdeps/aarch64/fpu/erff_data.c +++ b/sysdeps/aarch64/fpu/erff_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erff. +/* Lookup table used in vector erff. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = 4.0 (513 values): - - the first entry __erff_data.tab.erf contains the values of erf(r), - - the second entry __erff_data.tab.scale contains the values of + - the first entry __v_erff_data.tab.erf contains the values of erf(r), + - the second entry __v_erff_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct erff_data __erff_data = { +const struct v_erff_data __v_erff_data = { .tab = { { 0x0.000000p+0, 0x1.20dd76p+0 }, { 0x1.20dbf4p-7, 0x1.20d8f2p+0 }, { 0x1.20d770p-6, 0x1.20cb68p+0 }, diff --git a/sysdeps/aarch64/fpu/erff_sve.c b/sysdeps/aarch64/fpu/erff_sve.c index 38f00db9be..0e382eb09a 100644 --- a/sysdeps/aarch64/fpu/erff_sve.c +++ b/sysdeps/aarch64/fpu/erff_sve.c @@ -62,18 +62,17 @@ svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg) svfloat32_t shift = sv_f32 (dat->shift); svfloat32_t z = svadd_x (pg, a, shift); - svuint32_t i - = svsub_x (pg, svreinterpret_u32 (z), svreinterpret_u32 (shift)); - - /* Saturate lookup index. */ - i = svsel (a_ge_max, sv_u32 (512), i); + svuint32_t i = svand_x (pg, svreinterpret_u32 (z), 0xfff); + i = svadd_x (pg, i, i); /* r and erf(r) set to 0 for |x| below min. */ svfloat32_t r = svsub_z (a_gt_min, z, shift); - svfloat32_t erfr = svld1_gather_index (a_gt_min, __sv_erff_data.erf, i); + svfloat32_t erfr + = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].erf, i); /* scale set to 2/sqrt(pi) for |x| below min. */ - svfloat32_t scale = svld1_gather_index (a_gt_min, __sv_erff_data.scale, i); + svfloat32_t scale + = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].scale, i); scale = svsel (a_gt_min, scale, sv_f32 (dat->scale)); /* erf(x) ~ erf(r) + scale * d * (1 - r * d + 1/3 * d^2). */ diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c index e09b2f3b27..8aa3fa9c43 100644 --- a/sysdeps/aarch64/fpu/exp10f_sve.c +++ b/sysdeps/aarch64/fpu/exp10f_sve.c @@ -18,74 +18,83 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f32.h" -/* For x < -SpecialBound, the result is subnormal and not handled correctly by +/* For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ -#define SpecialBound 37.9 +#define Thres 37.9 static const struct data { - float poly[5]; - float shift, log10_2, log2_10_hi, log2_10_lo, special_bound; + float log2_10_lo, c0, c2, c4; + float c1, c3, log10_2; + float shift, log2_10_hi, thres; } data = { /* Coefficients generated using Remez algorithm with minimisation of relative error. rel error: 0x1.89dafa3p-24 abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] maxerr: 0.52 +0.5 ulp. */ - .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f, - 0x1.12b41ap-1f }, + .c0 = 0x1.26bb16p+1f, + .c1 = 0x1.5350d2p+1f, + .c2 = 0x1.04744ap+1f, + .c3 = 0x1.2d8176p+0f, + .c4 = 0x1.12b41ap-1f, /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, .log10_2 = 0x1.a934fp+1, .log2_10_hi = 0x1.344136p-2, .log2_10_lo = -0x1.ec10cp-27, - .special_bound = SpecialBound, + .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +static inline svfloat32_t +sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - return sv_call_f32 (exp10f, x, y, special); -} - -/* Single-precision SVE exp10f routine. Implements the same algorithm - as AdvSIMD exp10f. - Worst case error is 1.02 ULPs. - _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 - want 0x1.ba5f9cp-1. */ -svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) -{ - const struct data *d = ptr_barrier (&data); /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */ - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo); /* n = round(x/(log10(2)/N)). */ svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0); - svfloat32_t n = svsub_x (pg, z, shift); + svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, shift); /* r = x - n*log10(2)/N. */ - svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1); - r = svmls_lane (r, n, log10_2_and_inv, 2); + svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x); + r = svmls_lane (r, n, lane_consts, 0); - svbool_t special = svacgt (pg, x, d->special_bound); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t poly - = svmla_x (pg, svmul_x (pg, r, d->poly[0]), - sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2); - - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t p14 = svmla_x (pg, p12, p34, r2); + svfloat32_t p0 = svmul_lane (r, lane_consts, 1); + svfloat32_t poly = svmla_x (pg, p0, r2, p14); return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp10f routine. Implements the same algorithm + as AdvSIMD exp10f. + Worst case error is 1.02 ULPs. + _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 + want 0x1.ba5f9cp-1. */ +svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp10f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c index 8a686e3e05..c6216bed9e 100644 --- a/sysdeps/aarch64/fpu/exp2f_sve.c +++ b/sysdeps/aarch64/fpu/exp2f_sve.c @@ -24,54 +24,64 @@ static const struct data { - float poly[5]; + float c0, c2, c4, c1, c3; float shift, thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, - 0x1.59977ap-10f }, + /* Coefficients copied from the polynomial in AdvSIMD variant. */ + .c0 = 0x1.62e422p-1f, + .c1 = 0x1.ebf9bcp-3f, + .c2 = 0x1.c6bd32p-5f, + .c3 = 0x1.3ce9e4p-7f, + .c4 = 0x1.59977ap-10f, /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) -{ - return sv_call_f32 (exp2f, x, y, special); -} - -/* Single-precision SVE exp2f routine. Implements the same algorithm - as AdvSIMD exp2f. - Worst case error is 1.04 ULPs. - SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 - want 0x1.ba7ebp+0. */ -svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +static inline svfloat32_t +sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - const struct data *d = ptr_barrier (&data); /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = n + r, with r in [-1/2, 1/2]. */ - svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svadd_x (pg, x, shift); - svfloat32_t n = svsub_x (pg, z, shift); - svfloat32_t r = svsub_x (pg, x, n); + svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift); + svfloat32_t r = svsub_x (svptrue_b32 (), x, n); - svbool_t special = svacgt (pg, x, d->thres); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp2(r)-1. Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for coefficients 1 to 4, and apply most significant coefficient directly. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1); - svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); + svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2); + svfloat32_t p14 = svmla_x (pg, p12, r2, p34); + svfloat32_t p0 = svmul_lane (r, even_coeffs, 0); svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); - return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp2f routine. Implements the same algorithm + as AdvSIMD exp2f. + Worst case error is 1.04 ULPs. + _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1 + want 0x1.ba6a64p-1. */ +svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp2f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c index 99d2e647aa..5c9cb72620 100644 --- a/sysdeps/aarch64/fpu/expf_advsimd.c +++ b/sysdeps/aarch64/fpu/expf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[5]; - float32x4_t shift, inv_ln2, ln2_hi, ln2_lo; + float32x4_t inv_ln2, ln2_hi, ln2_lo; uint32x4_t exponent_bias; #if !WANT_SIMD_EXCEPT float32x4_t special_bound, scale_thresh; @@ -31,7 +31,6 @@ static const struct data /* maxerr: 1.45358 +0.5 ulp. */ .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, - .shift = V4 (0x1.8p23f), .inv_ln2 = V4 (0x1.715476p+0f), .ln2_hi = V4 (0x1.62e4p-1f), .ln2_lo = V4 (0x1.7f7d1cp-20f), @@ -85,7 +84,7 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly, z; + float32x4_t n, r, r2, scale, p, q, poly; uint32x4_t cmp, e; #if WANT_SIMD_EXCEPT @@ -104,11 +103,10 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - z = vfmaq_f32 (d->shift, x, d->inv_ln2); - n = vsubq_f32 (z, d->shift); + n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); r = vfmsq_f32 (x, n, d->ln2_hi); r = vfmsq_f32 (r, n, d->ln2_lo); - e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c index 3ba79bc4f1..da93e01b87 100644 --- a/sysdeps/aarch64/fpu/expf_sve.c +++ b/sysdeps/aarch64/fpu/expf_sve.c @@ -18,33 +18,25 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" +#include "sv_expf_inline.h" + +/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled + correctly by FEXPA. */ +#define Thres 0x1.5d5e2ap+6f static const struct data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift, thres; + struct sv_expf_data d; + float thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, - 0x1.0e4020p-7f }, - .inv_ln2 = 0x1.715476p+0f, - .ln2_hi = 0x1.62e4p-1f, - .ln2_lo = 0x1.7f7d1cp-20f, - /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, - /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled - correctly by FEXPA. */ - .thres = 0x1.5d5e2ap+6f, + .d = SV_EXPF_DATA, + .thres = Thres, }; -#define C(i) sv_f32 (d->poly[i]) -#define ExponentBias 0x3f800000 - static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d) { - return sv_call_f32 (expf, x, y, special); + return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special); } /* Optimised single-precision SVE exp function. @@ -54,36 +46,8 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2); - - /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0); - svfloat32_t n = svsub_x (pg, z, d->shift); - - /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1); - r = svmls_lane (r, n, invln2_and_ln2, 2); - - /* scale = 2^(n/N). */ svbool_t is_special_case = svacgt (pg, x, d->thres); - svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_x (pg, C (3), C (4), r); - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_x (pg, r, C (0)); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, is_special_case))) - return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case); - - return svmla_x (pg, scale, scale, poly); + return special_case (x, is_special_case, &d->d); + return expf_inline (x, pg, &d->d); } diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c index a0616ec754..8303ca296e 100644 --- a/sysdeps/aarch64/fpu/expm1f_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c @@ -18,27 +18,18 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_expm1f_inline.h" static const struct data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; - int32x4_t exponent_bias; + struct v_expm1f_data d; #if WANT_SIMD_EXCEPT uint32x4_t thresh; #else float32x4_t oflow_bound; #endif } data = { - /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, - /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, - .shift = V4 (0x1.8p23f), - .exponent_bias = V4 (0x3f800000), + .d = V_EXPM1F_DATA, #if !WANT_SIMD_EXCEPT /* Value above which expm1f(x) should overflow. Absolute value of the underflow bound is greater than this, so it catches both cases - there is @@ -55,67 +46,38 @@ static const struct data #define TinyBound v_u32 (0x34000000 << 1) static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t special, const struct data *d) { - return v_call_f32 (expm1f, x, y, special); + return v_call_f32 ( + expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special); } /* Single-precision vector exp(x) - 1 function. - The maximum error is 1.51 ULP: - _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2 - want 0x1.e2fb94p-2. */ + The maximum error is 1.62 ULP: + _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2 + want 0x1.da9f44p-2. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); #if WANT_SIMD_EXCEPT + uint32x4_t ix = vreinterpretq_u32_f32 (x); /* If fp exceptions are to be triggered correctly, fall back to scalar for |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for shift-left by 1, and compare with thresh which was left-shifted offline - this is effectively an absolute compare. */ uint32x4_t special = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh); - if (__glibc_unlikely (v_any_u32 (special))) - x = v_zerofy_f32 (x, special); #else /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); #endif - /* Reduce argument to smaller range: - Let i = round(x / ln2) - and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where 2^i is exact because i is an integer. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); - int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); - - /* Approximate expm1(f) using polynomial. - Taylor expansion for expm1(x) has the form: - x + ax^2 + bx^3 + cx^4 .... - So we calculate the polynomial P(f) = a + bf + cf^2 + ... - and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - float32x4_t p = v_horner_4_f32 (f, d->poly); - p = vfmaq_f32 (f, vmulq_f32 (f, f), p); - - /* Assemble the result. - expm1(x) ~= 2^i * (p + 1) - 1 - Let t = 2^i. */ - int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias); - float32x4_t t = vreinterpretq_f32_s32 (u); - if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t), - special); + return special_case (x, special, d); /* expm1(x) ~= p * t + (t - 1). */ - return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t); + return expm1f_inline (x, &d->d); } libmvec_hidden_def (V_NAME_F1 (expm1)) HALF_WIDTH_ALIAS_F1 (expm1) diff --git a/sysdeps/aarch64/fpu/log10f_advsimd.c b/sysdeps/aarch64/fpu/log10f_advsimd.c index 9347422a77..82228b599a 100644 --- a/sysdeps/aarch64/fpu/log10f_advsimd.c +++ b/sysdeps/aarch64/fpu/log10f_advsimd.c @@ -22,11 +22,11 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; + uint32x4_t mantissa_mask; float32x4_t poly[8]; float32x4_t inv_ln10, ln2; - uint32x4_t off, mantissa_mask; } data = { /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */ @@ -35,18 +35,22 @@ static const struct data V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) }, .ln2 = V4 (0x1.62e43p-1f), .inv_ln10 = V4 (0x1.bcb7b2p-2f), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, - uint16x4_t cmp) +special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); + return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); } /* Fast implementation of AdvSIMD log10f, @@ -58,15 +62,21 @@ special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log10(1+r) + n * log10(2). */ @@ -77,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) y = vmulq_f32 (y, d->inv_ln10); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, poly, r2, special); + return special_case (y, u_off, poly, r2, special, d); return vfmaq_f32 (y, poly, r2); } libmvec_hidden_def (V_NAME_F1 (log10)) diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c index bdbb49cd32..7913679f67 100644 --- a/sysdeps/aarch64/fpu/log10f_sve.c +++ b/sysdeps/aarch64/fpu/log10f_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0246[4]; float poly_1357[4]; float ln2, inv_ln10; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -35,18 +36,23 @@ static const struct data -0x1.0fc92cp-4f }, .ln2 = 0x1.62e43p-1f, .inv_ln10 = 0x1.bcb7b2p-2f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min 0x00800000 -#define Max 0x7f800000 -#define Thres 0x7f000000 /* Max - Min. */ -#define Offset 0x3f2aaaab /* 0.666667. */ +#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000. */ #define MantissaMask 0x007fffff static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log10f, x, y, special); + return sv_call_f32 ( + log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log10f using the same algorithm and @@ -57,23 +63,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t ix = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres); + + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - ix = svsub_x (pg, ix, Offset); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend. */ - ix = svand_x (pg, ix, MantissaMask); - ix = svadd_x (pg, ix, Offset); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend. */ + svuint32_t ix = svand_x (pg, u_off, MantissaMask); + ix = svadd_x (pg, ix, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f); /* y = log10(1+r) + n*log10(2) log10(1+r) ~ r * InvLn(10) + P(r) where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t r4 = svmul_x (pg, r2, r2); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2); svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0); svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1); @@ -88,7 +96,6 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) hi = svmul_x (pg, hi, d->inv_ln10); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y), - special); - return svmla_x (pg, hi, r2, y); + return special_case (u_off, hi, r2, y, special); + return svmla_x (svptrue_b32 (), hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c index ffc418fc9c..114064c696 100644 --- a/sysdeps/aarch64/fpu/log1p_advsimd.c +++ b/sysdeps/aarch64/fpu/log1p_advsimd.c @@ -127,3 +127,5 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x) return vfmaq_f64 (y, f2, p); } + +strong_alias (V_NAME_D1 (log1p), V_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c index 04f7e5720e..b21cfb2c90 100644 --- a/sysdeps/aarch64/fpu/log1p_sve.c +++ b/sysdeps/aarch64/fpu/log1p_sve.c @@ -116,3 +116,5 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) return y; } + +strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c index dc15334a85..00006fc703 100644 --- a/sysdeps/aarch64/fpu/log1pf_advsimd.c +++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c @@ -18,113 +18,81 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_log1pf_inline.h" + +#if WANT_SIMD_EXCEPT const static struct data { - float32x4_t poly[8], ln2; - uint32x4_t tiny_bound, minus_one, four, thresh; - int32x4_t three_quarters; + uint32x4_t minus_one, thresh; + struct v_log1pf_data d; } data = { - .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients - (1, -0.5) are not stored as they can be generated more - efficiently. */ - V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, - .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ - .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound. */ + .d = V_LOG1PF_CONSTANTS_TABLE, + .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound. */ .minus_one = V4 (0xbf800000), - .four = V4 (0x40800000), - .three_quarters = V4 (0x3f400000) }; -static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *p) -{ - /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme. */ - float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]); - float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]); - float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]); - float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]); - - float32x4_t m2 = vmulq_f32 (m, m); - float32x4_t p_02 = vfmaq_f32 (m, m2, p_12); - float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56); - float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]); - - float32x4_t m4 = vmulq_f32 (m2, m2); - float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36); - return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79)); -} +/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ +# define TinyBound v_u32 (0x34000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t cmp, const struct data *d) { - return v_call_f32 (log1pf, x, y, special); + /* Side-step special lanes so fenv exceptions are not triggered + inadvertently. */ + float32x4_t x_nospecial = v_zerofy_f32 (x, cmp); + return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp); } -/* Vector log1pf approximation using polynomial on reduced interval. Accuracy - is roughly 2.02 ULP: - log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */ +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.69 ULP: + _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3 + want 0x1.cfcbdcp-3. */ VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x)); + uint32x4_t special_cases - = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh), + = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh), vcgeq_u32 (ix, d->minus_one)); - float32x4_t special_arg = x; -#if WANT_SIMD_EXCEPT if (__glibc_unlikely (v_any_u32 (special_cases))) - /* Side-step special lanes so fenv exceptions are not triggered - inadvertently. */ - x = v_zerofy_f32 (x, special_cases); -#endif + return special_case (x, special_cases, d); - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ + return log1pf_inline (x, &d->d); +} - float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); +#else - /* Choose k to scale x to the range [-1/4, 1/2]. */ - int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), - v_s32 (0xff800000)); - uint32x4_t ku = vreinterpretq_u32_s32 (k); +const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE; - /* Scale x by exponent manipulation. */ - float32x4_t m_scale - = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); +static float32x4_t NOINLINE VPCS_ATTR +special_case (float32x4_t x, uint32x4_t cmp) +{ + return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp); +} - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number, and scale m down accordingly. */ - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); - m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.63 ULP: + _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3 + want 0x1.fdcb16p-3. */ +VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) +{ + uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)), + vcaleq_f32 (x, v_f32 (0x1p127f))); - /* Evaluate polynomial on the reduced interval. */ - float32x4_t p = eval_poly (m_scale, d->poly); + if (__glibc_unlikely (v_any_u32 (special_cases))) + return special_case (x, special_cases); - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23)); + return log1pf_inline (x, ptr_barrier (&data)); +} - /* Apply the scaling back. */ - float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2); +#endif - if (__glibc_unlikely (v_any_u32 (special_cases))) - return special_case (special_arg, y, special_cases); - return y; -} libmvec_hidden_def (V_NAME_F1 (log1p)) HALF_WIDTH_ALIAS_F1 (log1p) +strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1)) +libmvec_hidden_def (V_NAME_F1 (logp1)) +HALF_WIDTH_ALIAS_F1 (logp1) diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c index f645cc997e..5256d5e94c 100644 --- a/sysdeps/aarch64/fpu/log1pf_sve.c +++ b/sysdeps/aarch64/fpu/log1pf_sve.c @@ -98,3 +98,5 @@ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) return y; } + +strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log2f_advsimd.c b/sysdeps/aarch64/fpu/log2f_advsimd.c index db21836749..84effe4fe9 100644 --- a/sysdeps/aarch64/fpu/log2f_advsimd.c +++ b/sysdeps/aarch64/fpu/log2f_advsimd.c @@ -22,9 +22,9 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; - uint32x4_t off, mantissa_mask; + uint32x4_t mantissa_mask; float32x4_t poly[9]; } data = { /* Coefficients generated using Remez algorithm approximate @@ -34,18 +34,22 @@ static const struct data V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f), V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f), V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) }, - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, - uint16x4_t cmp) +special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); + return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); } /* Fast implementation for single precision AdvSIMD log2, @@ -56,15 +60,21 @@ special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log2(1+r) + n. */ @@ -72,7 +82,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, n, p, r, special); + return special_case (n, u_off, p, r, special, d); return vfmaq_f32 (n, p, r); } libmvec_hidden_def (V_NAME_F1 (log2)) diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c index 5031c42483..939d89bfb9 100644 --- a/sysdeps/aarch64/fpu/log2f_sve.c +++ b/sysdeps/aarch64/fpu/log2f_sve.c @@ -23,6 +23,7 @@ static const struct data { float poly_02468[5]; float poly_1357[4]; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -32,18 +33,23 @@ static const struct data }, .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f, 0x1.9d8ecap-3f, 0x1.9e495p-3f }, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thres (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define MantissaMask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log2f, x, y, cmp); + return sv_call_f32 ( + log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log2f, using the same algorithm @@ -55,19 +61,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, MantissaMask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + svuint32_t u = svand_x (pg, u_off, MantissaMask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log2(1+r) + n. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* Evaluate polynomial using pairwise Horner scheme. */ svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); @@ -81,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) y = svmla_x (pg, q_01, r2, y); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special); - return svmla_x (pg, n, r, y); + return special_case (u_off, n, r, y, special); + return svmla_x (svptrue_b32 (), n, r, y); } diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c index 3c0d0fcdc7..c20dbfd6c0 100644 --- a/sysdeps/aarch64/fpu/logf_advsimd.c +++ b/sysdeps/aarch64/fpu/logf_advsimd.c @@ -21,20 +21,22 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; + uint32x4_t mantissa_mask; float32x4_t poly[7]; - float32x4_t ln2, tiny_bound; - uint32x4_t off, mantissa_mask; + float32x4_t ln2; } data = { /* 3.34 ulp error. */ .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f), V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f), V4 (-0x1.ffffc8p-2f) }, .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x1p-126), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff) }; @@ -42,32 +44,37 @@ static const struct data #define P(i) d->poly[7 - i] static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p, - uint16x4_t cmp) +special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); + return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); } float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) { const struct data *d = ptr_barrier (&data); float32x4_t n, p, q, r, r2, y; - uint32x4_t u; + uint32x4_t u, u_off; uint16x4_t cmp; - u = vreinterpretq_u32_f32 (x); - cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vandq_u32 (u, d->mantissa_mask); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + u = vandq_u32 (u_off, d->mantissa_mask); u = vaddq_u32 (u, d->off); r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); + cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + /* y = log(1+r) + n*ln2. */ r2 = vmulq_f32 (r, r); /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */ @@ -80,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) p = vfmaq_f32 (r, d->ln2, n); if (__glibc_unlikely (v_any_u16h (cmp))) - return special_case (x, y, r2, p, cmp); + return special_case (p, u_off, y, r2, cmp, d); return vfmaq_f32 (p, y, r2); } libmvec_hidden_def (V_NAME_F1 (log)) diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c index d64e810cfe..5b9324678d 100644 --- a/sysdeps/aarch64/fpu/logf_sve.c +++ b/sysdeps/aarch64/fpu/logf_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0135[4]; float poly_246[3]; float ln2; + uint32_t off, lower; } data = { .poly_0135 = { /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so @@ -32,19 +33,24 @@ static const struct data -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f }, .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f }, - .ln2 = 0x1.62e43p-1f + .ln2 = 0x1.62e43p-1f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thresh (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define Mask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (logf, x, y, cmp); + return sv_call_f32 ( + logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE logf, using the same algorithm and @@ -55,19 +61,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, Mask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + + svuint32_t u = svand_x (pg, u_off, Mask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log(1+r) + n*ln2. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */ svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]); svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1); @@ -80,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) p = svmla_x (pg, r, n, d->ln2); if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp); + return special_case (u_off, p, r2, y, cmp); return svmla_x (pg, p, r2, y); } diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c index a0d9d3b819..718125cbad 100644 --- a/sysdeps/aarch64/fpu/sin_advsimd.c +++ b/sysdeps/aarch64/fpu/sin_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19), @@ -34,12 +34,13 @@ static const struct data .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255). */ -# define Thresh v_u64 (0x1160000000000000) /* RangeVal - TinyBound. */ +/* asuint64(0x1p-253)), below which multiply by inv_pi underflows. */ +# define TinyBound v_u64 (0x3020000000000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u64 (0x1160000000000000) #endif #define C(i) d->poly[i] @@ -72,16 +73,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) fenv). These lanes will be fixed by special-case handler later. */ uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x)); cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh); - r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); + r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp)); #else r = x; cmp = vcageq_f64 (x, d->range_val); #endif /* n = rint(|x|/pi). */ - n = vfmaq_f64 (d->shift, d->inv_pi, r); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); + n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c index 375dfc3331..6ee9a23d5b 100644 --- a/sysdeps/aarch64/fpu/sinf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,13 +33,14 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), .range_val = V4 (0x1p20f) }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f). */ -# define Thresh v_u32 (0x28800000) /* RangeVal - TinyBound. */ +/* asuint32(0x1p-59f), below which multiply by inv_pi underflows. */ +# define TinyBound v_u32 (0x22000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u32 (0x27800000) #endif #define C(i) d->poly[i] @@ -64,23 +65,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) /* If fenv exceptions are to be triggered correctly, set any special lanes to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by special-case handler later. */ - r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); + r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp)); #else r = x; cmp = vcageq_f32 (x, d->range_val); #endif - /* n = rint(|x|/pi) */ - n = vfmaq_f32 (d->shift, d->inv_pi, r); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + /* n = rint(|x|/pi). */ + n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); - /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f32 (r, d->pi_1, n); r = vfmsq_f32 (r, d->pi_2, n); r = vfmsq_f32 (r, d->pi_3, n); - /* y = sin(r) */ + /* y = sin(r). */ r2 = vmulq_f32 (r, r); y = vfmaq_f32 (C (2), C (3), r2); y = vfmaq_f32 (C (1), y, r2); diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c index 6bb7482dc2..c6ed7598e7 100644 --- a/sysdeps/aarch64/fpu/sinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c @@ -23,15 +23,13 @@ static const struct data { struct v_expm1f_data expm1f_consts; - uint32x4_t halff; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound, thresh; #else - uint32x4_t oflow_bound; + float32x4_t oflow_bound; #endif } data = { .expm1f_consts = V_EXPM1F_DATA, - .halff = V4 (0x3f000000), #if WANT_SIMD_EXCEPT /* 0x1.6a09e8p-32, below which expm1f underflows. */ .tiny_bound = V4 (0x2fb504f4), @@ -39,14 +37,15 @@ static const struct data .thresh = V4 (0x12fbbbb3), #else /* 0x1.61814ep+6, above which expm1f helper overflows. */ - .oflow_bound = V4 (0x42b0c0a7), + .oflow_bound = V4 (0x1.61814ep+6), #endif }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign, + uint32x4_t special) { - return v_call_f32 (sinhf, x, y, special); + return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special); } /* Approximation for vector single-precision sinh(x) using expm1. @@ -60,15 +59,15 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) uint32x4_t ix = vreinterpretq_u32_f32 (x); float32x4_t ax = vabsq_f32 (x); - uint32x4_t iax = vreinterpretq_u32_f32 (ax); - uint32x4_t sign = veorq_u32 (ix, iax); - float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff)); + float32x4_t halfsign = vreinterpretq_f32_u32 ( + vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5)))); #if WANT_SIMD_EXCEPT - uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh); + uint32x4_t special = vcgeq_u32 ( + vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh); ax = v_zerofy_f32 (ax, special); #else - uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound); + uint32x4_t special = vcageq_f32 (x, d->oflow_bound); #endif /* Up to the point that expm1f overflows, we can use it to calculate sinhf @@ -80,7 +79,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) /* Fall back to the scalar variant for any lanes that should trigger an exception. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (t, halfsign), special); + return special_case (x, t, halfsign, special); return vmulq_f32 (t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sv_erf_data.c b/sysdeps/aarch64/fpu/sv_erf_data.c deleted file mode 100644 index a53878f893..0000000000 --- a/sysdeps/aarch64/fpu/sv_erf_data.c +++ /dev/null @@ -1,1570 +0,0 @@ -/* Table for SVE erf approximation - - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include "vecmath_config.h" - -/* Lookup table used in vector erf. - For each possible rounded input r (multiples of 1/128), between - r = 0.0 and r = 6.0 (769 values): - - the first entry __erf_data.tab.erf contains the values of erf(r), - - the second entry __erf_data.tab.scale contains the values of - 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the - algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct sv_erf_data __sv_erf_data = { - .erf = { 0x0.0000000000000p+0, - 0x1.20dbf3deb1340p-7, - 0x1.20d77083f17a0p-6, - 0x1.b137e0cf584dcp-6, - 0x1.20c5645dd2538p-5, - 0x1.68e5d3bbc9526p-5, - 0x1.b0fafef135745p-5, - 0x1.f902a77bd3821p-5, - 0x1.207d480e90658p-4, - 0x1.44703e87e8593p-4, - 0x1.68591a1e83b5dp-4, - 0x1.8c36beb8a8d23p-4, - 0x1.b0081148a873ap-4, - 0x1.d3cbf7e70a4b3p-4, - 0x1.f78159ec8bb50p-4, - 0x1.0d939005f65e5p-3, - 0x1.1f5e1a35c3b89p-3, - 0x1.311fc15f56d14p-3, - 0x1.42d7fc2f64959p-3, - 0x1.548642321d7c6p-3, - 0x1.662a0bdf7a89fp-3, - 0x1.77c2d2a765f9ep-3, - 0x1.895010fdbdbfdp-3, - 0x1.9ad142662e14dp-3, - 0x1.ac45e37fe2526p-3, - 0x1.bdad72110a648p-3, - 0x1.cf076d1233237p-3, - 0x1.e05354b96ff36p-3, - 0x1.f190aa85540e2p-3, - 0x1.015f78a3dcf3dp-2, - 0x1.09eed6982b948p-2, - 0x1.127631eb8de32p-2, - 0x1.1af54e232d609p-2, - 0x1.236bef825d9a2p-2, - 0x1.2bd9db0f7827fp-2, - 0x1.343ed6989b7d9p-2, - 0x1.3c9aa8b84bedap-2, - 0x1.44ed18d9f6462p-2, - 0x1.4d35ef3e5372ep-2, - 0x1.5574f4ffac98ep-2, - 0x1.5da9f415ff23fp-2, - 0x1.65d4b75b00471p-2, - 0x1.6df50a8dff772p-2, - 0x1.760aba57a76bfp-2, - 0x1.7e15944d9d3e4p-2, - 0x1.861566f5fd3c0p-2, - 0x1.8e0a01cab516bp-2, - 0x1.95f3353cbb146p-2, - 0x1.9dd0d2b721f39p-2, - 0x1.a5a2aca209394p-2, - 0x1.ad68966569a87p-2, - 0x1.b522646bbda68p-2, - 0x1.bccfec24855b8p-2, - 0x1.c4710406a65fcp-2, - 0x1.cc058392a6d2dp-2, - 0x1.d38d4354c3bd0p-2, - 0x1.db081ce6e2a48p-2, - 0x1.e275eaf25e458p-2, - 0x1.e9d68931ae650p-2, - 0x1.f129d471eabb1p-2, - 0x1.f86faa9428f9dp-2, - 0x1.ffa7ea8eb5fd0p-2, - 0x1.03693a371519cp-1, - 0x1.06f794ab2cae7p-1, - 0x1.0a7ef5c18edd2p-1, - 0x1.0dff4f247f6c6p-1, - 0x1.1178930ada115p-1, - 0x1.14eab43841b55p-1, - 0x1.1855a5fd3dd50p-1, - 0x1.1bb95c3746199p-1, - 0x1.1f15cb50bc4dep-1, - 0x1.226ae840d4d70p-1, - 0x1.25b8a88b6dd7fp-1, - 0x1.28ff0240d52cdp-1, - 0x1.2c3debfd7d6c1p-1, - 0x1.2f755ce9a21f4p-1, - 0x1.32a54cb8db67bp-1, - 0x1.35cdb3a9a144dp-1, - 0x1.38ee8a84beb71p-1, - 0x1.3c07ca9cb4f9ep-1, - 0x1.3f196dcd0f135p-1, - 0x1.42236e79a5fa6p-1, - 0x1.4525c78dd5966p-1, - 0x1.4820747ba2dc2p-1, - 0x1.4b13713ad3513p-1, - 0x1.4dfeba47f63ccp-1, - 0x1.50e24ca35fd2cp-1, - 0x1.53be25d016a4fp-1, - 0x1.569243d2b3a9bp-1, - 0x1.595ea53035283p-1, - 0x1.5c2348ecc4dc3p-1, - 0x1.5ee02e8a71a53p-1, - 0x1.61955607dd15dp-1, - 0x1.6442bfdedd397p-1, - 0x1.66e86d0312e82p-1, - 0x1.69865ee075011p-1, - 0x1.6c1c9759d0e5fp-1, - 0x1.6eab18c74091bp-1, - 0x1.7131e5f496a5ap-1, - 0x1.73b1021fc0cb8p-1, - 0x1.762870f720c6fp-1, - 0x1.78983697dc96fp-1, - 0x1.7b00578c26037p-1, - 0x1.7d60d8c979f7bp-1, - 0x1.7fb9bfaed8078p-1, - 0x1.820b1202f27fbp-1, - 0x1.8454d5f25760dp-1, - 0x1.8697120d92a4ap-1, - 0x1.88d1cd474a2e0p-1, - 0x1.8b050ef253c37p-1, - 0x1.8d30debfc572ep-1, - 0x1.8f5544bd00c04p-1, - 0x1.91724951b8fc6p-1, - 0x1.9387f53df5238p-1, - 0x1.959651980da31p-1, - 0x1.979d67caa6631p-1, - 0x1.999d4192a5715p-1, - 0x1.9b95e8fd26abap-1, - 0x1.9d8768656cc42p-1, - 0x1.9f71ca72cffb6p-1, - 0x1.a1551a16aaeafp-1, - 0x1.a331628a45b92p-1, - 0x1.a506af4cc00f4p-1, - 0x1.a6d50c20fa293p-1, - 0x1.a89c850b7d54dp-1, - 0x1.aa5d265064366p-1, - 0x1.ac16fc7143263p-1, - 0x1.adca142b10f98p-1, - 0x1.af767a741088bp-1, - 0x1.b11c3c79bb424p-1, - 0x1.b2bb679ead19cp-1, - 0x1.b4540978921eep-1, - 0x1.b5e62fce16095p-1, - 0x1.b771e894d602ep-1, - 0x1.b8f741ef54f83p-1, - 0x1.ba764a2af2b78p-1, - 0x1.bbef0fbde6221p-1, - 0x1.bd61a1453ab44p-1, - 0x1.bece0d82d1a5cp-1, - 0x1.c034635b66e23p-1, - 0x1.c194b1d49a184p-1, - 0x1.c2ef0812fc1bdp-1, - 0x1.c443755820d64p-1, - 0x1.c5920900b5fd1p-1, - 0x1.c6dad2829ec62p-1, - 0x1.c81de16b14cefp-1, - 0x1.c95b455cce69dp-1, - 0x1.ca930e0e2a825p-1, - 0x1.cbc54b476248dp-1, - 0x1.ccf20ce0c0d27p-1, - 0x1.ce1962c0e0d8bp-1, - 0x1.cf3b5cdaf0c39p-1, - 0x1.d0580b2cfd249p-1, - 0x1.d16f7dbe41ca0p-1, - 0x1.d281c49d818d0p-1, - 0x1.d38eefdf64fddp-1, - 0x1.d4970f9ce00d9p-1, - 0x1.d59a33f19ed42p-1, - 0x1.d6986cfa798e7p-1, - 0x1.d791cad3eff01p-1, - 0x1.d8865d98abe01p-1, - 0x1.d97635600bb89p-1, - 0x1.da61623cb41e0p-1, - 0x1.db47f43b2980dp-1, - 0x1.dc29fb60715afp-1, - 0x1.dd0787a8bb39dp-1, - 0x1.dde0a90611a0dp-1, - 0x1.deb56f5f12d28p-1, - 0x1.df85ea8db188ep-1, - 0x1.e0522a5dfda73p-1, - 0x1.e11a3e8cf4eb8p-1, - 0x1.e1de36c75ba58p-1, - 0x1.e29e22a89d766p-1, - 0x1.e35a11b9b61cep-1, - 0x1.e4121370224ccp-1, - 0x1.e4c6372cd8927p-1, - 0x1.e5768c3b4a3fcp-1, - 0x1.e62321d06c5e0p-1, - 0x1.e6cc0709c8a0dp-1, - 0x1.e7714aec96534p-1, - 0x1.e812fc64db369p-1, - 0x1.e8b12a44944a8p-1, - 0x1.e94be342e6743p-1, - 0x1.e9e335fb56f87p-1, - 0x1.ea7730ed0bbb9p-1, - 0x1.eb07e27a133aap-1, - 0x1.eb9558e6b42cep-1, - 0x1.ec1fa258c4beap-1, - 0x1.eca6ccd709544p-1, - 0x1.ed2ae6489ac1ep-1, - 0x1.edabfc7453e63p-1, - 0x1.ee2a1d004692cp-1, - 0x1.eea5557137ae0p-1, - 0x1.ef1db32a2277cp-1, - 0x1.ef93436bc2daap-1, - 0x1.f006135426b26p-1, - 0x1.f0762fde45ee6p-1, - 0x1.f0e3a5e1a1788p-1, - 0x1.f14e8211e8c55p-1, - 0x1.f1b6d0fea5f4dp-1, - 0x1.f21c9f12f0677p-1, - 0x1.f27ff89525acfp-1, - 0x1.f2e0e9a6a8b09p-1, - 0x1.f33f7e43a706bp-1, - 0x1.f39bc242e43e6p-1, - 0x1.f3f5c1558b19ep-1, - 0x1.f44d870704911p-1, - 0x1.f4a31ebcd47dfp-1, - 0x1.f4f693b67bd77p-1, - 0x1.f547f10d60597p-1, - 0x1.f59741b4b97cfp-1, - 0x1.f5e4907982a07p-1, - 0x1.f62fe80272419p-1, - 0x1.f67952cff6282p-1, - 0x1.f6c0db3c34641p-1, - 0x1.f7068b7b10fd9p-1, - 0x1.f74a6d9a38383p-1, - 0x1.f78c8b812d498p-1, - 0x1.f7cceef15d631p-1, - 0x1.f80ba18636f07p-1, - 0x1.f848acb544e95p-1, - 0x1.f88419ce4e184p-1, - 0x1.f8bdf1fb78370p-1, - 0x1.f8f63e416ebffp-1, - 0x1.f92d077f8d56dp-1, - 0x1.f96256700da8ep-1, - 0x1.f99633a838a57p-1, - 0x1.f9c8a7989af0dp-1, - 0x1.f9f9ba8d3c733p-1, - 0x1.fa2974addae45p-1, - 0x1.fa57ddfe27376p-1, - 0x1.fa84fe5e05c8dp-1, - 0x1.fab0dd89d1309p-1, - 0x1.fadb831a9f9c3p-1, - 0x1.fb04f6868a944p-1, - 0x1.fb2d3f20f9101p-1, - 0x1.fb54641aebbc9p-1, - 0x1.fb7a6c834b5a2p-1, - 0x1.fb9f5f4739170p-1, - 0x1.fbc3433260ca5p-1, - 0x1.fbe61eef4cf6ap-1, - 0x1.fc07f907bc794p-1, - 0x1.fc28d7e4f9cd0p-1, - 0x1.fc48c1d033c7ap-1, - 0x1.fc67bcf2d7b8fp-1, - 0x1.fc85cf56ecd38p-1, - 0x1.fca2fee770c79p-1, - 0x1.fcbf5170b578bp-1, - 0x1.fcdacca0bfb73p-1, - 0x1.fcf57607a6e7cp-1, - 0x1.fd0f5317f582fp-1, - 0x1.fd2869270a56fp-1, - 0x1.fd40bd6d7a785p-1, - 0x1.fd58550773cb5p-1, - 0x1.fd6f34f52013ap-1, - 0x1.fd85621b0876dp-1, - 0x1.fd9ae142795e3p-1, - 0x1.fdafb719e6a69p-1, - 0x1.fdc3e835500b3p-1, - 0x1.fdd7790ea5bc0p-1, - 0x1.fdea6e062d0c9p-1, - 0x1.fdfccb62e52d3p-1, - 0x1.fe0e9552ebdd6p-1, - 0x1.fe1fcfebe2083p-1, - 0x1.fe307f2b503d0p-1, - 0x1.fe40a6f70af4bp-1, - 0x1.fe504b1d9696cp-1, - 0x1.fe5f6f568b301p-1, - 0x1.fe6e1742f7cf6p-1, - 0x1.fe7c466dc57a1p-1, - 0x1.fe8a004c19ae6p-1, - 0x1.fe97483db8670p-1, - 0x1.fea4218d6594ap-1, - 0x1.feb08f7146046p-1, - 0x1.febc950b3fa75p-1, - 0x1.fec835695932ep-1, - 0x1.fed37386190fbp-1, - 0x1.fede5248e38f4p-1, - 0x1.fee8d486585eep-1, - 0x1.fef2fd00af31ap-1, - 0x1.fefcce6813974p-1, - 0x1.ff064b5afffbep-1, - 0x1.ff0f766697c76p-1, - 0x1.ff18520700971p-1, - 0x1.ff20e0a7ba8c2p-1, - 0x1.ff2924a3f7a83p-1, - 0x1.ff312046f2339p-1, - 0x1.ff38d5cc4227fp-1, - 0x1.ff404760319b4p-1, - 0x1.ff47772010262p-1, - 0x1.ff4e671a85425p-1, - 0x1.ff55194fe19dfp-1, - 0x1.ff5b8fb26f5f6p-1, - 0x1.ff61cc26c1578p-1, - 0x1.ff67d08401202p-1, - 0x1.ff6d9e943c231p-1, - 0x1.ff733814af88cp-1, - 0x1.ff789eb6130c9p-1, - 0x1.ff7dd41ce2b4dp-1, - 0x1.ff82d9e1a76d8p-1, - 0x1.ff87b1913e853p-1, - 0x1.ff8c5cad200a5p-1, - 0x1.ff90dcaba4096p-1, - 0x1.ff9532f846ab0p-1, - 0x1.ff9960f3eb327p-1, - 0x1.ff9d67f51ddbap-1, - 0x1.ffa14948549a7p-1, - 0x1.ffa506302ebaep-1, - 0x1.ffa89fe5b3625p-1, - 0x1.ffac17988ef4bp-1, - 0x1.ffaf6e6f4f5c0p-1, - 0x1.ffb2a5879f35ep-1, - 0x1.ffb5bdf67fe6fp-1, - 0x1.ffb8b8c88295fp-1, - 0x1.ffbb970200110p-1, - 0x1.ffbe599f4f9d9p-1, - 0x1.ffc10194fcb64p-1, - 0x1.ffc38fcffbb7cp-1, - 0x1.ffc60535dd7f5p-1, - 0x1.ffc862a501fd7p-1, - 0x1.ffcaa8f4c9beap-1, - 0x1.ffccd8f5c66d1p-1, - 0x1.ffcef371ea4d7p-1, - 0x1.ffd0f92cb6ba7p-1, - 0x1.ffd2eae369a07p-1, - 0x1.ffd4c94d29fdbp-1, - 0x1.ffd6951b33686p-1, - 0x1.ffd84ef9009eep-1, - 0x1.ffd9f78c7524ap-1, - 0x1.ffdb8f7605ee7p-1, - 0x1.ffdd1750e1220p-1, - 0x1.ffde8fb314ebfp-1, - 0x1.ffdff92db56e5p-1, - 0x1.ffe1544d01ccbp-1, - 0x1.ffe2a1988857cp-1, - 0x1.ffe3e19349dc7p-1, - 0x1.ffe514bbdc197p-1, - 0x1.ffe63b8c8b5f7p-1, - 0x1.ffe7567b7b5e1p-1, - 0x1.ffe865fac722bp-1, - 0x1.ffe96a78a04a9p-1, - 0x1.ffea645f6d6dap-1, - 0x1.ffeb5415e7c44p-1, - 0x1.ffec39ff380b9p-1, - 0x1.ffed167b12ac2p-1, - 0x1.ffede9e5d3262p-1, - 0x1.ffeeb49896c6dp-1, - 0x1.ffef76e956a9fp-1, - 0x1.fff0312b010b5p-1, - 0x1.fff0e3ad91ec2p-1, - 0x1.fff18ebe2b0e1p-1, - 0x1.fff232a72b48ep-1, - 0x1.fff2cfb0453d9p-1, - 0x1.fff3661e9569dp-1, - 0x1.fff3f634b79f9p-1, - 0x1.fff48032dbe40p-1, - 0x1.fff50456dab8cp-1, - 0x1.fff582dc48d30p-1, - 0x1.fff5fbfc8a439p-1, - 0x1.fff66feee5129p-1, - 0x1.fff6dee89352ep-1, - 0x1.fff7491cd4af6p-1, - 0x1.fff7aebcff755p-1, - 0x1.fff80ff8911fdp-1, - 0x1.fff86cfd3e657p-1, - 0x1.fff8c5f702ccfp-1, - 0x1.fff91b102fca8p-1, - 0x1.fff96c717b695p-1, - 0x1.fff9ba420e834p-1, - 0x1.fffa04a7928b1p-1, - 0x1.fffa4bc63ee9ap-1, - 0x1.fffa8fc0e5f33p-1, - 0x1.fffad0b901755p-1, - 0x1.fffb0ecebee1bp-1, - 0x1.fffb4a210b172p-1, - 0x1.fffb82cd9dcbfp-1, - 0x1.fffbb8f1049c6p-1, - 0x1.fffbeca6adbe9p-1, - 0x1.fffc1e08f25f5p-1, - 0x1.fffc4d3120aa1p-1, - 0x1.fffc7a37857d2p-1, - 0x1.fffca53375ce3p-1, - 0x1.fffcce3b57bffp-1, - 0x1.fffcf564ab6b7p-1, - 0x1.fffd1ac4135f9p-1, - 0x1.fffd3e6d5cd87p-1, - 0x1.fffd607387b07p-1, - 0x1.fffd80e8ce0dap-1, - 0x1.fffd9fdeabccep-1, - 0x1.fffdbd65e5ad0p-1, - 0x1.fffdd98e903b2p-1, - 0x1.fffdf46816833p-1, - 0x1.fffe0e0140857p-1, - 0x1.fffe26683972ap-1, - 0x1.fffe3daa95b18p-1, - 0x1.fffe53d558ae9p-1, - 0x1.fffe68f4fa777p-1, - 0x1.fffe7d156d244p-1, - 0x1.fffe904222101p-1, - 0x1.fffea2860ee1ep-1, - 0x1.fffeb3ebb267bp-1, - 0x1.fffec47d19457p-1, - 0x1.fffed443e2787p-1, - 0x1.fffee34943b15p-1, - 0x1.fffef1960d85dp-1, - 0x1.fffeff32af7afp-1, - 0x1.ffff0c273bea2p-1, - 0x1.ffff187b6bc0ep-1, - 0x1.ffff2436a21dcp-1, - 0x1.ffff2f5fefcaap-1, - 0x1.ffff39fe16963p-1, - 0x1.ffff44178c8d2p-1, - 0x1.ffff4db27f146p-1, - 0x1.ffff56d4d5e5ep-1, - 0x1.ffff5f8435efcp-1, - 0x1.ffff67c604180p-1, - 0x1.ffff6f9f67e55p-1, - 0x1.ffff77154e0d6p-1, - 0x1.ffff7e2c6aea2p-1, - 0x1.ffff84e93cd75p-1, - 0x1.ffff8b500e77cp-1, - 0x1.ffff9164f8e46p-1, - 0x1.ffff972be5c59p-1, - 0x1.ffff9ca891572p-1, - 0x1.ffffa1de8c582p-1, - 0x1.ffffa6d13de73p-1, - 0x1.ffffab83e54b8p-1, - 0x1.ffffaff99bac4p-1, - 0x1.ffffb43555b5fp-1, - 0x1.ffffb839e52f3p-1, - 0x1.ffffbc09fa7cdp-1, - 0x1.ffffbfa82616bp-1, - 0x1.ffffc316d9ed0p-1, - 0x1.ffffc6586abf6p-1, - 0x1.ffffc96f1165ep-1, - 0x1.ffffcc5cec0c1p-1, - 0x1.ffffcf23ff5fcp-1, - 0x1.ffffd1c637b2bp-1, - 0x1.ffffd4456a10dp-1, - 0x1.ffffd6a3554a1p-1, - 0x1.ffffd8e1a2f22p-1, - 0x1.ffffdb01e8546p-1, - 0x1.ffffdd05a75eap-1, - 0x1.ffffdeee4f810p-1, - 0x1.ffffe0bd3e852p-1, - 0x1.ffffe273c15b7p-1, - 0x1.ffffe41314e06p-1, - 0x1.ffffe59c6698bp-1, - 0x1.ffffe710d565ep-1, - 0x1.ffffe8717232dp-1, - 0x1.ffffe9bf4098cp-1, - 0x1.ffffeafb377d5p-1, - 0x1.ffffec2641a9ep-1, - 0x1.ffffed413e5b7p-1, - 0x1.ffffee4d01cd6p-1, - 0x1.ffffef4a55bd4p-1, - 0x1.fffff039f9e8fp-1, - 0x1.fffff11ca4876p-1, - 0x1.fffff1f302bc1p-1, - 0x1.fffff2bdb904dp-1, - 0x1.fffff37d63a36p-1, - 0x1.fffff43297019p-1, - 0x1.fffff4dde0118p-1, - 0x1.fffff57fc4a95p-1, - 0x1.fffff618c3da6p-1, - 0x1.fffff6a956450p-1, - 0x1.fffff731ee681p-1, - 0x1.fffff7b2f8ed6p-1, - 0x1.fffff82cdcf1bp-1, - 0x1.fffff89ffc4aap-1, - 0x1.fffff90cb3c81p-1, - 0x1.fffff9735b73bp-1, - 0x1.fffff9d446cccp-1, - 0x1.fffffa2fc5015p-1, - 0x1.fffffa8621251p-1, - 0x1.fffffad7a2652p-1, - 0x1.fffffb248c39dp-1, - 0x1.fffffb6d1e95dp-1, - 0x1.fffffbb196132p-1, - 0x1.fffffbf22c1e2p-1, - 0x1.fffffc2f171e3p-1, - 0x1.fffffc688a9cfp-1, - 0x1.fffffc9eb76acp-1, - 0x1.fffffcd1cbc28p-1, - 0x1.fffffd01f36afp-1, - 0x1.fffffd2f57d68p-1, - 0x1.fffffd5a2041fp-1, - 0x1.fffffd8271d12p-1, - 0x1.fffffda86faa9p-1, - 0x1.fffffdcc3b117p-1, - 0x1.fffffdedf37edp-1, - 0x1.fffffe0db6b91p-1, - 0x1.fffffe2ba0ea5p-1, - 0x1.fffffe47ccb60p-1, - 0x1.fffffe62534d4p-1, - 0x1.fffffe7b4c81ep-1, - 0x1.fffffe92ced93p-1, - 0x1.fffffea8ef9cfp-1, - 0x1.fffffebdc2ec6p-1, - 0x1.fffffed15bcbap-1, - 0x1.fffffee3cc32cp-1, - 0x1.fffffef5251c2p-1, - 0x1.ffffff0576917p-1, - 0x1.ffffff14cfb92p-1, - 0x1.ffffff233ee1dp-1, - 0x1.ffffff30d18e8p-1, - 0x1.ffffff3d9480fp-1, - 0x1.ffffff4993c46p-1, - 0x1.ffffff54dab72p-1, - 0x1.ffffff5f74141p-1, - 0x1.ffffff6969fb8p-1, - 0x1.ffffff72c5fb6p-1, - 0x1.ffffff7b91176p-1, - 0x1.ffffff83d3d07p-1, - 0x1.ffffff8b962bep-1, - 0x1.ffffff92dfba2p-1, - 0x1.ffffff99b79d2p-1, - 0x1.ffffffa0248e8p-1, - 0x1.ffffffa62ce54p-1, - 0x1.ffffffabd69b4p-1, - 0x1.ffffffb127525p-1, - 0x1.ffffffb624592p-1, - 0x1.ffffffbad2affp-1, - 0x1.ffffffbf370cdp-1, - 0x1.ffffffc355dfdp-1, - 0x1.ffffffc733572p-1, - 0x1.ffffffcad3626p-1, - 0x1.ffffffce39b67p-1, - 0x1.ffffffd169d0cp-1, - 0x1.ffffffd466fa5p-1, - 0x1.ffffffd7344aap-1, - 0x1.ffffffd9d4aabp-1, - 0x1.ffffffdc4ad7ap-1, - 0x1.ffffffde9964ep-1, - 0x1.ffffffe0c2bf0p-1, - 0x1.ffffffe2c92dbp-1, - 0x1.ffffffe4aed5ep-1, - 0x1.ffffffe675bbdp-1, - 0x1.ffffffe81fc4ep-1, - 0x1.ffffffe9aeb97p-1, - 0x1.ffffffeb24467p-1, - 0x1.ffffffec81ff2p-1, - 0x1.ffffffedc95e7p-1, - 0x1.ffffffeefbc85p-1, - 0x1.fffffff01a8b6p-1, - 0x1.fffffff126e1ep-1, - 0x1.fffffff221f30p-1, - 0x1.fffffff30cd3fp-1, - 0x1.fffffff3e8892p-1, - 0x1.fffffff4b606fp-1, - 0x1.fffffff57632dp-1, - 0x1.fffffff629e44p-1, - 0x1.fffffff6d1e56p-1, - 0x1.fffffff76ef3fp-1, - 0x1.fffffff801c1fp-1, - 0x1.fffffff88af67p-1, - 0x1.fffffff90b2e3p-1, - 0x1.fffffff982fc1p-1, - 0x1.fffffff9f2e9fp-1, - 0x1.fffffffa5b790p-1, - 0x1.fffffffabd229p-1, - 0x1.fffffffb18582p-1, - 0x1.fffffffb6d844p-1, - 0x1.fffffffbbd0aap-1, - 0x1.fffffffc0748fp-1, - 0x1.fffffffc4c96cp-1, - 0x1.fffffffc8d462p-1, - 0x1.fffffffcc9a41p-1, - 0x1.fffffffd01f89p-1, - 0x1.fffffffd36871p-1, - 0x1.fffffffd678edp-1, - 0x1.fffffffd954aep-1, - 0x1.fffffffdbff2ap-1, - 0x1.fffffffde7ba0p-1, - 0x1.fffffffe0cd16p-1, - 0x1.fffffffe2f664p-1, - 0x1.fffffffe4fa30p-1, - 0x1.fffffffe6daf7p-1, - 0x1.fffffffe89b0cp-1, - 0x1.fffffffea3c9ap-1, - 0x1.fffffffebc1a9p-1, - 0x1.fffffffed2c21p-1, - 0x1.fffffffee7dc8p-1, - 0x1.fffffffefb847p-1, - 0x1.ffffffff0dd2bp-1, - 0x1.ffffffff1ede9p-1, - 0x1.ffffffff2ebdap-1, - 0x1.ffffffff3d843p-1, - 0x1.ffffffff4b453p-1, - 0x1.ffffffff58126p-1, - 0x1.ffffffff63fc3p-1, - 0x1.ffffffff6f121p-1, - 0x1.ffffffff79626p-1, - 0x1.ffffffff82fabp-1, - 0x1.ffffffff8be77p-1, - 0x1.ffffffff94346p-1, - 0x1.ffffffff9bec8p-1, - 0x1.ffffffffa319fp-1, - 0x1.ffffffffa9c63p-1, - 0x1.ffffffffaffa4p-1, - 0x1.ffffffffb5be5p-1, - 0x1.ffffffffbb1a2p-1, - 0x1.ffffffffc014ep-1, - 0x1.ffffffffc4b56p-1, - 0x1.ffffffffc901cp-1, - 0x1.ffffffffccfffp-1, - 0x1.ffffffffd0b56p-1, - 0x1.ffffffffd4271p-1, - 0x1.ffffffffd759dp-1, - 0x1.ffffffffda520p-1, - 0x1.ffffffffdd13cp-1, - 0x1.ffffffffdfa2dp-1, - 0x1.ffffffffe202dp-1, - 0x1.ffffffffe4371p-1, - 0x1.ffffffffe642ap-1, - 0x1.ffffffffe8286p-1, - 0x1.ffffffffe9eb0p-1, - 0x1.ffffffffeb8d0p-1, - 0x1.ffffffffed10ap-1, - 0x1.ffffffffee782p-1, - 0x1.ffffffffefc57p-1, - 0x1.fffffffff0fa7p-1, - 0x1.fffffffff218fp-1, - 0x1.fffffffff3227p-1, - 0x1.fffffffff4188p-1, - 0x1.fffffffff4fc9p-1, - 0x1.fffffffff5cfdp-1, - 0x1.fffffffff6939p-1, - 0x1.fffffffff748ep-1, - 0x1.fffffffff7f0dp-1, - 0x1.fffffffff88c5p-1, - 0x1.fffffffff91c6p-1, - 0x1.fffffffff9a1bp-1, - 0x1.fffffffffa1d2p-1, - 0x1.fffffffffa8f6p-1, - 0x1.fffffffffaf92p-1, - 0x1.fffffffffb5b0p-1, - 0x1.fffffffffbb58p-1, - 0x1.fffffffffc095p-1, - 0x1.fffffffffc56dp-1, - 0x1.fffffffffc9e8p-1, - 0x1.fffffffffce0dp-1, - 0x1.fffffffffd1e1p-1, - 0x1.fffffffffd56cp-1, - 0x1.fffffffffd8b3p-1, - 0x1.fffffffffdbbap-1, - 0x1.fffffffffde86p-1, - 0x1.fffffffffe11dp-1, - 0x1.fffffffffe380p-1, - 0x1.fffffffffe5b6p-1, - 0x1.fffffffffe7c0p-1, - 0x1.fffffffffe9a2p-1, - 0x1.fffffffffeb60p-1, - 0x1.fffffffffecfbp-1, - 0x1.fffffffffee77p-1, - 0x1.fffffffffefd6p-1, - 0x1.ffffffffff11ap-1, - 0x1.ffffffffff245p-1, - 0x1.ffffffffff359p-1, - 0x1.ffffffffff457p-1, - 0x1.ffffffffff542p-1, - 0x1.ffffffffff61bp-1, - 0x1.ffffffffff6e3p-1, - 0x1.ffffffffff79bp-1, - 0x1.ffffffffff845p-1, - 0x1.ffffffffff8e2p-1, - 0x1.ffffffffff973p-1, - 0x1.ffffffffff9f8p-1, - 0x1.ffffffffffa73p-1, - 0x1.ffffffffffae4p-1, - 0x1.ffffffffffb4cp-1, - 0x1.ffffffffffbadp-1, - 0x1.ffffffffffc05p-1, - 0x1.ffffffffffc57p-1, - 0x1.ffffffffffca2p-1, - 0x1.ffffffffffce7p-1, - 0x1.ffffffffffd27p-1, - 0x1.ffffffffffd62p-1, - 0x1.ffffffffffd98p-1, - 0x1.ffffffffffdcap-1, - 0x1.ffffffffffdf8p-1, - 0x1.ffffffffffe22p-1, - 0x1.ffffffffffe49p-1, - 0x1.ffffffffffe6cp-1, - 0x1.ffffffffffe8dp-1, - 0x1.ffffffffffeabp-1, - 0x1.ffffffffffec7p-1, - 0x1.ffffffffffee1p-1, - 0x1.ffffffffffef8p-1, - 0x1.fffffffffff0ep-1, - 0x1.fffffffffff22p-1, - 0x1.fffffffffff34p-1, - 0x1.fffffffffff45p-1, - 0x1.fffffffffff54p-1, - 0x1.fffffffffff62p-1, - 0x1.fffffffffff6fp-1, - 0x1.fffffffffff7bp-1, - 0x1.fffffffffff86p-1, - 0x1.fffffffffff90p-1, - 0x1.fffffffffff9ap-1, - 0x1.fffffffffffa2p-1, - 0x1.fffffffffffaap-1, - 0x1.fffffffffffb1p-1, - 0x1.fffffffffffb8p-1, - 0x1.fffffffffffbep-1, - 0x1.fffffffffffc3p-1, - 0x1.fffffffffffc8p-1, - 0x1.fffffffffffcdp-1, - 0x1.fffffffffffd1p-1, - 0x1.fffffffffffd5p-1, - 0x1.fffffffffffd9p-1, - 0x1.fffffffffffdcp-1, - 0x1.fffffffffffdfp-1, - 0x1.fffffffffffe2p-1, - 0x1.fffffffffffe4p-1, - 0x1.fffffffffffe7p-1, - 0x1.fffffffffffe9p-1, - 0x1.fffffffffffebp-1, - 0x1.fffffffffffedp-1, - 0x1.fffffffffffeep-1, - 0x1.ffffffffffff0p-1, - 0x1.ffffffffffff1p-1, - 0x1.ffffffffffff3p-1, - 0x1.ffffffffffff4p-1, - 0x1.ffffffffffff5p-1, - 0x1.ffffffffffff6p-1, - 0x1.ffffffffffff7p-1, - 0x1.ffffffffffff7p-1, - 0x1.ffffffffffff8p-1, - 0x1.ffffffffffff9p-1, - 0x1.ffffffffffff9p-1, - 0x1.ffffffffffffap-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffcp-1, - 0x1.ffffffffffffcp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - }, - .scale = { 0x1.20dd750429b6dp+0, - 0x1.20d8f1975c85dp+0, - 0x1.20cb67bd452c7p+0, - 0x1.20b4d8bac36c1p+0, - 0x1.209546ad13ccfp+0, - 0x1.206cb4897b148p+0, - 0x1.203b261cd0052p+0, - 0x1.2000a00ae3804p+0, - 0x1.1fbd27cdc72d3p+0, - 0x1.1f70c3b4f2cc7p+0, - 0x1.1f1b7ae44867fp+0, - 0x1.1ebd5552f795bp+0, - 0x1.1e565bca400d4p+0, - 0x1.1de697e413d28p+0, - 0x1.1d6e14099944ap+0, - 0x1.1cecdb718d61cp+0, - 0x1.1c62fa1e869b6p+0, - 0x1.1bd07cdd189acp+0, - 0x1.1b357141d95d5p+0, - 0x1.1a91e5a748165p+0, - 0x1.19e5e92b964abp+0, - 0x1.19318bae53a04p+0, - 0x1.1874ddcdfce24p+0, - 0x1.17aff0e56ec10p+0, - 0x1.16e2d7093cd8cp+0, - 0x1.160da304ed92fp+0, - 0x1.153068581b781p+0, - 0x1.144b3b337c90cp+0, - 0x1.135e3075d076bp+0, - 0x1.12695da8b5bdep+0, - 0x1.116cd8fd67618p+0, - 0x1.1068b94962e5ep+0, - 0x1.0f5d1602f7e41p+0, - 0x1.0e4a073dc1b91p+0, - 0x1.0d2fa5a70c168p+0, - 0x1.0c0e0a8223359p+0, - 0x1.0ae54fa490722p+0, - 0x1.09b58f724416bp+0, - 0x1.087ee4d9ad247p+0, - 0x1.07416b4fbfe7cp+0, - 0x1.05fd3ecbec297p+0, - 0x1.04b27bc403d30p+0, - 0x1.03613f2812dafp+0, - 0x1.0209a65e29545p+0, - 0x1.00abcf3e187a9p+0, - 0x1.fe8fb01a47307p-1, - 0x1.fbbbbef34b4b2p-1, - 0x1.f8dc092d58ff8p-1, - 0x1.f5f0cdaf15313p-1, - 0x1.f2fa4c16c0019p-1, - 0x1.eff8c4b1375dbp-1, - 0x1.ecec7870ebca7p-1, - 0x1.e9d5a8e4c934ep-1, - 0x1.e6b4982f158b9p-1, - 0x1.e38988fc46e72p-1, - 0x1.e054be79d3042p-1, - 0x1.dd167c4cf9d2ap-1, - 0x1.d9cf06898cdafp-1, - 0x1.d67ea1a8b5368p-1, - 0x1.d325927fb9d89p-1, - 0x1.cfc41e36c7df9p-1, - 0x1.cc5a8a3fbea40p-1, - 0x1.c8e91c4d01368p-1, - 0x1.c5701a484ef9dp-1, - 0x1.c1efca49a5011p-1, - 0x1.be68728e29d5dp-1, - 0x1.bada596f25436p-1, - 0x1.b745c55905bf8p-1, - 0x1.b3aafcc27502ep-1, - 0x1.b00a46237d5bep-1, - 0x1.ac63e7ecc1411p-1, - 0x1.a8b8287ec6a09p-1, - 0x1.a5074e2157620p-1, - 0x1.a1519efaf889ep-1, - 0x1.9d97610879642p-1, - 0x1.99d8da149c13fp-1, - 0x1.96164fafd8de3p-1, - 0x1.925007283d7aap-1, - 0x1.8e86458169af8p-1, - 0x1.8ab94f6caa71dp-1, - 0x1.86e9694134b9ep-1, - 0x1.8316d6f48133dp-1, - 0x1.7f41dc12c9e89p-1, - 0x1.7b6abbb7aaf19p-1, - 0x1.7791b886e7403p-1, - 0x1.73b714a552763p-1, - 0x1.6fdb11b1e0c34p-1, - 0x1.6bfdf0beddaf5p-1, - 0x1.681ff24b4ab04p-1, - 0x1.6441563c665d4p-1, - 0x1.60625bd75d07bp-1, - 0x1.5c8341bb23767p-1, - 0x1.58a445da7c74cp-1, - 0x1.54c5a57629db0p-1, - 0x1.50e79d1749ac9p-1, - 0x1.4d0a6889dfd9fp-1, - 0x1.492e42d78d2c5p-1, - 0x1.4553664273d24p-1, - 0x1.417a0c4049fd0p-1, - 0x1.3da26d759aef5p-1, - 0x1.39ccc1b136d5ap-1, - 0x1.35f93fe7d1b3dp-1, - 0x1.32281e2fd1a92p-1, - 0x1.2e5991bd4cbfcp-1, - 0x1.2a8dcede3673bp-1, - 0x1.26c508f6bd0ffp-1, - 0x1.22ff727dd6f7bp-1, - 0x1.1f3d3cf9ffe5ap-1, - 0x1.1b7e98fe26217p-1, - 0x1.17c3b626c7a11p-1, - 0x1.140cc3173f007p-1, - 0x1.1059ed7740313p-1, - 0x1.0cab61f084b93p-1, - 0x1.09014c2ca74dap-1, - 0x1.055bd6d32e8d7p-1, - 0x1.01bb2b87c6968p-1, - 0x1.fc3ee5d1524b0p-2, - 0x1.f511a91a67d2ap-2, - 0x1.edeeee0959518p-2, - 0x1.e6d6ffaa65a25p-2, - 0x1.dfca26f5bbf88p-2, - 0x1.d8c8aace11e63p-2, - 0x1.d1d2cfff91594p-2, - 0x1.cae8d93f1d7b6p-2, - 0x1.c40b0729ed547p-2, - 0x1.bd3998457afdap-2, - 0x1.b674c8ffc6283p-2, - 0x1.afbcd3afe8ab6p-2, - 0x1.a911f096fbc26p-2, - 0x1.a27455e14c93cp-2, - 0x1.9be437a7de946p-2, - 0x1.9561c7f23a47bp-2, - 0x1.8eed36b886d93p-2, - 0x1.8886b1e5ecfd1p-2, - 0x1.822e655b417e6p-2, - 0x1.7be47af1f5d89p-2, - 0x1.75a91a7f4d2edp-2, - 0x1.6f7c69d7d3ef8p-2, - 0x1.695e8cd31867ep-2, - 0x1.634fa54fa285fp-2, - 0x1.5d4fd33729015p-2, - 0x1.575f3483021c3p-2, - 0x1.517de540ce2a3p-2, - 0x1.4babff975a04cp-2, - 0x1.45e99bcbb7915p-2, - 0x1.4036d0468a7a2p-2, - 0x1.3a93b1998736cp-2, - 0x1.35005285227f1p-2, - 0x1.2f7cc3fe6f423p-2, - 0x1.2a09153529381p-2, - 0x1.24a55399ea239p-2, - 0x1.1f518ae487dc8p-2, - 0x1.1a0dc51a9934dp-2, - 0x1.14da0a961fd14p-2, - 0x1.0fb6620c550afp-2, - 0x1.0aa2d09497f2bp-2, - 0x1.059f59af7a906p-2, - 0x1.00abff4dec7a3p-2, - 0x1.f79183b101c5bp-3, - 0x1.edeb406d9c824p-3, - 0x1.e4652fadcb6b2p-3, - 0x1.daff4969c0b04p-3, - 0x1.d1b982c501370p-3, - 0x1.c893ce1dcbef7p-3, - 0x1.bf8e1b1ca2279p-3, - 0x1.b6a856c3ed54fp-3, - 0x1.ade26b7fbed95p-3, - 0x1.a53c4135a6526p-3, - 0x1.9cb5bd549b111p-3, - 0x1.944ec2e4f5630p-3, - 0x1.8c07329874652p-3, - 0x1.83deeada4d25ap-3, - 0x1.7bd5c7df3fe9cp-3, - 0x1.73eba3b5b07b7p-3, - 0x1.6c205655be71fp-3, - 0x1.6473b5b15a7a1p-3, - 0x1.5ce595c455b0ap-3, - 0x1.5575c8a468361p-3, - 0x1.4e241e912c305p-3, - 0x1.46f066040a832p-3, - 0x1.3fda6bc016994p-3, - 0x1.38e1fae1d6a9dp-3, - 0x1.3206dceef5f87p-3, - 0x1.2b48d9e5dea1cp-3, - 0x1.24a7b84d38971p-3, - 0x1.1e233d434b813p-3, - 0x1.17bb2c8d41535p-3, - 0x1.116f48a6476ccp-3, - 0x1.0b3f52ce8c383p-3, - 0x1.052b0b1a174eap-3, - 0x1.fe6460fef4680p-4, - 0x1.f2a901ccafb37p-4, - 0x1.e723726b824a9p-4, - 0x1.dbd32ac4c99b0p-4, - 0x1.d0b7a0f921e7cp-4, - 0x1.c5d0497c09e74p-4, - 0x1.bb1c972f23e50p-4, - 0x1.b09bfb7d11a83p-4, - 0x1.a64de673e8837p-4, - 0x1.9c31c6df3b1b8p-4, - 0x1.92470a61b6965p-4, - 0x1.888d1d8e510a3p-4, - 0x1.7f036c0107294p-4, - 0x1.75a96077274bap-4, - 0x1.6c7e64e7281cbp-4, - 0x1.6381e2980956bp-4, - 0x1.5ab342383d177p-4, - 0x1.5211ebf41880bp-4, - 0x1.499d478bca735p-4, - 0x1.4154bc68d75c3p-4, - 0x1.3937b1b319259p-4, - 0x1.31458e6542847p-4, - 0x1.297db960e4f63p-4, - 0x1.21df9981f8e53p-4, - 0x1.1a6a95b1e786fp-4, - 0x1.131e14fa1625dp-4, - 0x1.0bf97e95f2a64p-4, - 0x1.04fc3a0481321p-4, - 0x1.fc4b5e32d6259p-5, - 0x1.eeea8c1b1db93p-5, - 0x1.e1d4cf1e2450ap-5, - 0x1.d508f9a1ea64ep-5, - 0x1.c885df3451a07p-5, - 0x1.bc4a54a84e834p-5, - 0x1.b055303221015p-5, - 0x1.a4a549829587ep-5, - 0x1.993979e14fffdp-5, - 0x1.8e109c4622913p-5, - 0x1.83298d717210ep-5, - 0x1.78832c03aa2b1p-5, - 0x1.6e1c5893c380bp-5, - 0x1.63f3f5c4de13bp-5, - 0x1.5a08e85af27e0p-5, - 0x1.505a174e9c929p-5, - 0x1.46e66be002240p-5, - 0x1.3dacd1a8d8ccdp-5, - 0x1.34ac36ad8dafep-5, - 0x1.2be38b6d92415p-5, - 0x1.2351c2f2d1449p-5, - 0x1.1af5d2e04f3f6p-5, - 0x1.12ceb37ff9bc3p-5, - 0x1.0adb5fcfa8c75p-5, - 0x1.031ad58d56279p-5, - 0x1.f7182a851bca2p-6, - 0x1.e85c449e377f2p-6, - 0x1.da0005e5f28dfp-6, - 0x1.cc0180af00a8bp-6, - 0x1.be5ecd2fcb5f9p-6, - 0x1.b1160991ff737p-6, - 0x1.a4255a00b9f03p-6, - 0x1.978ae8b55ce1bp-6, - 0x1.8b44e6031383ep-6, - 0x1.7f5188610ddc8p-6, - 0x1.73af0c737bb45p-6, - 0x1.685bb5134ef13p-6, - 0x1.5d55cb54cd53ap-6, - 0x1.529b9e8cf9a1ep-6, - 0x1.482b8455dc491p-6, - 0x1.3e03d891b37dep-6, - 0x1.3422fd6d12e2bp-6, - 0x1.2a875b5ffab56p-6, - 0x1.212f612dee7fbp-6, - 0x1.181983e5133ddp-6, - 0x1.0f443edc5ce49p-6, - 0x1.06ae13b0d3255p-6, - 0x1.fcab1483ea7fcp-7, - 0x1.ec72615a894c4p-7, - 0x1.dcaf3691fc448p-7, - 0x1.cd5ec93c12431p-7, - 0x1.be7e5ac24963bp-7, - 0x1.b00b38d6b3575p-7, - 0x1.a202bd6372dcep-7, - 0x1.94624e78e0fafp-7, - 0x1.87275e3a6869dp-7, - 0x1.7a4f6aca256cbp-7, - 0x1.6dd7fe3358230p-7, - 0x1.61beae53b72b7p-7, - 0x1.56011cc3b036dp-7, - 0x1.4a9cf6bda3f4cp-7, - 0x1.3f8ff5042a88ep-7, - 0x1.34d7dbc76d7e5p-7, - 0x1.2a727a89a3f14p-7, - 0x1.205dac02bd6b9p-7, - 0x1.1697560347b25p-7, - 0x1.0d1d69569b82dp-7, - 0x1.03ede1a45bfeep-7, - 0x1.f60d8aa2a88f2p-8, - 0x1.e4cc4abf7d065p-8, - 0x1.d4143a9dfe965p-8, - 0x1.c3e1a5f5c077cp-8, - 0x1.b430ecf4a83a8p-8, - 0x1.a4fe83fb9db25p-8, - 0x1.9646f35a76623p-8, - 0x1.8806d70b2fc36p-8, - 0x1.7a3ade6c8b3e4p-8, - 0x1.6cdfcbfc1e263p-8, - 0x1.5ff2750fe7820p-8, - 0x1.536fc18f7ce5cp-8, - 0x1.4754abacdf1dcp-8, - 0x1.3b9e3f9d06e3fp-8, - 0x1.30499b503957fp-8, - 0x1.2553ee2a336bfp-8, - 0x1.1aba78ba3af89p-8, - 0x1.107a8c7323a6ep-8, - 0x1.06918b6355624p-8, - 0x1.f9f9cfd9c3035p-9, - 0x1.e77448fb66bb9p-9, - 0x1.d58da68fd1170p-9, - 0x1.c4412bf4b8f0bp-9, - 0x1.b38a3af2e55b4p-9, - 0x1.a3645330550ffp-9, - 0x1.93cb11a30d765p-9, - 0x1.84ba3004a50d0p-9, - 0x1.762d84469c18fp-9, - 0x1.6821000795a03p-9, - 0x1.5a90b00981d93p-9, - 0x1.4d78bba8ca5fdp-9, - 0x1.40d564548fad7p-9, - 0x1.34a305080681fp-9, - 0x1.28de11c5031ebp-9, - 0x1.1d83170fbf6fbp-9, - 0x1.128eb96be8798p-9, - 0x1.07fdb4dafea5fp-9, - 0x1.fb99b8b8279e1p-10, - 0x1.e7f232d9e2630p-10, - 0x1.d4fed7195d7e8p-10, - 0x1.c2b9cf7f893bfp-10, - 0x1.b11d702b3deb1p-10, - 0x1.a024365f771bdp-10, - 0x1.8fc8c794b03b5p-10, - 0x1.8005f08d6f1efp-10, - 0x1.70d6a46e07ddap-10, - 0x1.6235fbd7a4345p-10, - 0x1.541f340697987p-10, - 0x1.468dadf4080abp-10, - 0x1.397ced7af2b15p-10, - 0x1.2ce898809244ep-10, - 0x1.20cc76202c5fap-10, - 0x1.15246dda49d47p-10, - 0x1.09ec86c75d497p-10, - 0x1.fe41cd9bb4eeep-11, - 0x1.e97ba3b77f306p-11, - 0x1.d57f524723822p-11, - 0x1.c245d4b998479p-11, - 0x1.afc85e0f82e12p-11, - 0x1.9e005769dbc1dp-11, - 0x1.8ce75e9f6f8a0p-11, - 0x1.7c7744d9378f7p-11, - 0x1.6caa0d3582fe9p-11, - 0x1.5d79eb71e893bp-11, - 0x1.4ee1429bf7cc0p-11, - 0x1.40daa3c89f5b6p-11, - 0x1.3360ccd23db3ap-11, - 0x1.266ea71d4f71ap-11, - 0x1.19ff4663ae9dfp-11, - 0x1.0e0de78654d1ep-11, - 0x1.0295ef6591848p-11, - 0x1.ef25d37f49fe1p-12, - 0x1.da01102b5f851p-12, - 0x1.c5b5412dcafadp-12, - 0x1.b23a5a23e4210p-12, - 0x1.9f8893d8fd1c1p-12, - 0x1.8d986a4187285p-12, - 0x1.7c629a822bc9ep-12, - 0x1.6be02102b3520p-12, - 0x1.5c0a378c90bcap-12, - 0x1.4cda5374ea275p-12, - 0x1.3e4a23d1f4702p-12, - 0x1.30538fbb77ecdp-12, - 0x1.22f0b496539bdp-12, - 0x1.161be46ad3b50p-12, - 0x1.09cfa445b00ffp-12, - 0x1.fc0d55470cf51p-13, - 0x1.e577bbcd49935p-13, - 0x1.cfd4a5adec5bfp-13, - 0x1.bb1a9657ce465p-13, - 0x1.a740684026555p-13, - 0x1.943d4a1d1ed39p-13, - 0x1.8208bc334a6a5p-13, - 0x1.709a8db59f25cp-13, - 0x1.5feada379d8b7p-13, - 0x1.4ff207314a102p-13, - 0x1.40a8c1949f75ep-13, - 0x1.3207fb7420eb9p-13, - 0x1.2408e9ba3327fp-13, - 0x1.16a501f0e42cap-13, - 0x1.09d5f819c9e29p-13, - 0x1.fb2b792b40a22p-14, - 0x1.e3bcf436a1a95p-14, - 0x1.cd55277c18d05p-14, - 0x1.b7e94604479dcp-14, - 0x1.a36eec00926ddp-14, - 0x1.8fdc1b2dcf7b9p-14, - 0x1.7d2737527c3f9p-14, - 0x1.6b4702d7d5849p-14, - 0x1.5a329b7d30748p-14, - 0x1.49e17724f4d41p-14, - 0x1.3a4b60ba9aa4dp-14, - 0x1.2b6875310f785p-14, - 0x1.1d312098e9dbap-14, - 0x1.0f9e1b4dd36dfp-14, - 0x1.02a8673a94691p-14, - 0x1.ec929a665b449p-15, - 0x1.d4f4b4c8e09edp-15, - 0x1.be6abbb10a5aap-15, - 0x1.a8e8cc1fadef6p-15, - 0x1.94637d5bacfdbp-15, - 0x1.80cfdc72220cfp-15, - 0x1.6e2367dc27f95p-15, - 0x1.5c540b4936fd2p-15, - 0x1.4b581b8d170fcp-15, - 0x1.3b2652b06c2b2p-15, - 0x1.2bb5cc22e5db6p-15, - 0x1.1cfe010e2052dp-15, - 0x1.0ef6c4c84a0fep-15, - 0x1.01984165a5f36p-15, - 0x1.e9b5e8d00ce76p-16, - 0x1.d16f5716c6c1ap-16, - 0x1.ba4f035d60e02p-16, - 0x1.a447b7b03f045p-16, - 0x1.8f4ccca7fc90dp-16, - 0x1.7b5223dac7336p-16, - 0x1.684c227fcacefp-16, - 0x1.562fac4329b48p-16, - 0x1.44f21e49054f2p-16, - 0x1.34894a5e24657p-16, - 0x1.24eb7254ccf83p-16, - 0x1.160f438c70913p-16, - 0x1.07ebd2a2d2844p-16, - 0x1.f4f12e9ab070ap-17, - 0x1.db5ad0b27805cp-17, - 0x1.c304efa2c6f4ep-17, - 0x1.abe09e9144b5ep-17, - 0x1.95df988e76644p-17, - 0x1.80f439b4ee04bp-17, - 0x1.6d11788a69c64p-17, - 0x1.5a2adfa0b4bc4p-17, - 0x1.4834877429b8fp-17, - 0x1.37231085c7d9ap-17, - 0x1.26eb9daed6f7ep-17, - 0x1.1783ceac28910p-17, - 0x1.08e1badf0fcedp-17, - 0x1.f5f7d88472604p-18, - 0x1.db92b5212fb8dp-18, - 0x1.c282cd3957edap-18, - 0x1.aab7abace48dcp-18, - 0x1.94219bfcb4928p-18, - 0x1.7eb1a2075864dp-18, - 0x1.6a597219a93d9p-18, - 0x1.570b69502f313p-18, - 0x1.44ba864670882p-18, - 0x1.335a62115bce2p-18, - 0x1.22df298214423p-18, - 0x1.133d96ae7e0ddp-18, - 0x1.046aeabcfcdecp-18, - 0x1.ecb9cfe1d8642p-19, - 0x1.d21397ead99cbp-19, - 0x1.b8d094c86d374p-19, - 0x1.a0df0f0c626dcp-19, - 0x1.8a2e269750a39p-19, - 0x1.74adc8f4064d3p-19, - 0x1.604ea819f007cp-19, - 0x1.4d0231928c6f9p-19, - 0x1.3aba85fe22e1fp-19, - 0x1.296a70f414053p-19, - 0x1.1905613b3abf2p-19, - 0x1.097f6156f32c5p-19, - 0x1.f59a20caf6695p-20, - 0x1.d9c73698fb1dcp-20, - 0x1.bf716c6168baep-20, - 0x1.a6852c6b58392p-20, - 0x1.8eefd70594a88p-20, - 0x1.789fb715aae95p-20, - 0x1.6383f726a8e04p-20, - 0x1.4f8c96f26a26ap-20, - 0x1.3caa61607f920p-20, - 0x1.2acee2f5ecdb8p-20, - 0x1.19ec60b1242edp-20, - 0x1.09f5cf4dd2877p-20, - 0x1.f5bd95d8730d8p-21, - 0x1.d9371e2ff7c35p-21, - 0x1.be41de54d155ap-21, - 0x1.a4c89e08ef4f3p-21, - 0x1.8cb738399b12cp-21, - 0x1.75fa8dbc84becp-21, - 0x1.608078a70dcbcp-21, - 0x1.4c37c0394d094p-21, - 0x1.39100d5687bfep-21, - 0x1.26f9df8519bd6p-21, - 0x1.15e6827001f18p-21, - 0x1.05c803e4831c1p-21, - 0x1.ed22548cffd35p-22, - 0x1.d06ad6ecdf971p-22, - 0x1.b551c847fbc96p-22, - 0x1.9bc09f112b494p-22, - 0x1.83a1ff0aa239dp-22, - 0x1.6ce1aa3fd7bddp-22, - 0x1.576c72b514859p-22, - 0x1.43302cc4a0da8p-22, - 0x1.301ba221dc9bbp-22, - 0x1.1e1e857adc568p-22, - 0x1.0d2966b1746f7p-22, - 0x1.fa5b4f49cc6b2p-23, - 0x1.dc3ae30b55c16p-23, - 0x1.bfd7555a3bd68p-23, - 0x1.a517d9e61628ap-23, - 0x1.8be4f8f6c951fp-23, - 0x1.74287ded49339p-23, - 0x1.5dcd669f2cd34p-23, - 0x1.48bfd38302870p-23, - 0x1.34ecf8a3c124ap-23, - 0x1.22430f521cbcfp-23, - 0x1.10b1488aeb235p-23, - 0x1.0027c00a263a6p-23, - 0x1.e12ee004efc37p-24, - 0x1.c3e44ae32b16bp-24, - 0x1.a854ea14102a8p-24, - 0x1.8e6761569f45dp-24, - 0x1.7603bac345f65p-24, - 0x1.5f1353cdad001p-24, - 0x1.4980cb3c80949p-24, - 0x1.3537f00b6ad4dp-24, - 0x1.2225b12bffc68p-24, - 0x1.10380e1adb7e9p-24, - 0x1.febc107d5efaap-25, - 0x1.df0f2a0ee6946p-25, - 0x1.c14b2188bcee4p-25, - 0x1.a553644f7f07dp-25, - 0x1.8b0cfce0579dfp-25, - 0x1.725e7c5dd20f7p-25, - 0x1.5b2fe547a1340p-25, - 0x1.456a974e92e93p-25, - 0x1.30f93c3699078p-25, - 0x1.1dc7b5b978cf8p-25, - 0x1.0bc30c5d52f15p-25, - 0x1.f5b2be65a0c7fp-26, - 0x1.d5f3a8dea7357p-26, - 0x1.b82915b03515bp-26, - 0x1.9c3517e789488p-26, - 0x1.81fb7df06136ep-26, - 0x1.6961b8d641d06p-26, - 0x1.524ec4d916caep-26, - 0x1.3cab1343d18d1p-26, - 0x1.2860757487a01p-26, - 0x1.155a09065d4f7p-26, - 0x1.0384250e4c9fcp-26, - 0x1.e59890b926c78p-27, - 0x1.c642116a8a9e3p-27, - 0x1.a8e405e651ab6p-27, - 0x1.8d5f98114f872p-27, - 0x1.7397c5a66e307p-27, - 0x1.5b71456c5a4c4p-27, - 0x1.44d26de513197p-27, - 0x1.2fa31d6371537p-27, - 0x1.1bcca373b7b43p-27, - 0x1.0939ab853339fp-27, - 0x1.efac5187b2863p-28, - 0x1.cf1e86235d0e6p-28, - 0x1.b0a68a2128babp-28, - 0x1.9423165bc4444p-28, - 0x1.7974e743dea3cp-28, - 0x1.607e9eacd1050p-28, - 0x1.4924a74dec728p-28, - 0x1.334d19e0c2160p-28, - 0x1.1edfa3c5f5ccap-28, - 0x1.0bc56f1b54701p-28, - 0x1.f3d2185e047d9p-29, - 0x1.d26cb87945e87p-29, - 0x1.b334fac4b9f99p-29, - 0x1.96076f7918d1cp-29, - 0x1.7ac2d72fc2c63p-29, - 0x1.614801550319ep-29, - 0x1.4979ac8b28926p-29, - 0x1.333c68e2d0548p-29, - 0x1.1e767bce37dd7p-29, - 0x1.0b0fc5b6d05a0p-29, - 0x1.f1e3523b41d7dp-30, - 0x1.d00de6608effep-30, - 0x1.b0778b7b3301ap-30, - 0x1.92fb04ec0f6cfp-30, - 0x1.77756ec9f78fap-30, - 0x1.5dc61922d5a06p-30, - 0x1.45ce65699ff6dp-30, - 0x1.2f71a5f159970p-30, - 0x1.1a94ff571654fp-30, - 0x1.071f4bbea09ecp-30, - 0x1.e9f1ff8ddd774p-31, - 0x1.c818223a202c7p-31, - 0x1.a887bd2b4404dp-31, - 0x1.8b1a336c5eb6bp-31, - 0x1.6fab63324088ap-31, - 0x1.56197e30205bap-31, - 0x1.3e44e45301b92p-31, - 0x1.281000bfe4c3fp-31, - 0x1.135f28f2d50b4p-31, - 0x1.00187dded5975p-31, - 0x1.dc479de0ef001p-32, - 0x1.bad4fdad3caa1p-32, - 0x1.9baed3ed27ab8p-32, - 0x1.7ead9ce4285bbp-32, - 0x1.63ac6b4edc88ep-32, - 0x1.4a88be2a6390cp-32, - 0x1.332259185f1a0p-32, - 0x1.1d5b1f3793044p-32, - 0x1.0916f04b6e18bp-32, - 0x1.ec77101de6926p-33, - 0x1.c960bf23153e0p-33, - 0x1.a8bd20fc65ef7p-33, - 0x1.8a61745ec7d1dp-33, - 0x1.6e25d0e756261p-33, - 0x1.53e4f7d1666cbp-33, - 0x1.3b7c27a7ddb0ep-33, - 0x1.24caf2c32af14p-33, - 0x1.0fb3186804d0fp-33, - 0x1.f830c0bb41fd7p-34, - 0x1.d3c0f1a91c846p-34, - 0x1.b1e5acf351d87p-34, - 0x1.92712d259ce66p-34, - 0x1.7538c60a04476p-34, - 0x1.5a14b04b47879p-34, - 0x1.40dfd87456f4cp-34, - 0x1.2977b1172b9d5p-34, - 0x1.13bc07e891491p-34, - 0x1.ff1dbb4300811p-35, - 0x1.d9a880f306bd8p-35, - 0x1.b6e45220b55e0p-35, - 0x1.96a0b33f2c4dap-35, - 0x1.78b07e9e924acp-35, - 0x1.5ce9ab1670dd2p-35, - 0x1.4325167006bb0p-35, - 0x1.2b3e53538ff3fp-35, - 0x1.15137a7f44864p-35, - 0x1.0084ff125639dp-35, - 0x1.daeb0b7311ec7p-36, - 0x1.b7937d1c40c52p-36, - 0x1.96d082f59ab06p-36, - 0x1.7872d9fa10aadp-36, - 0x1.5c4e8e37bc7d0p-36, - 0x1.423ac0df49a40p-36, - 0x1.2a117230ad284p-36, - 0x1.13af4f04f9998p-36, - 0x1.fde703724e560p-37, - 0x1.d77f0c82e7641p-37, - 0x1.b3ee02611d7ddp-37, - 0x1.92ff33023d5bdp-37, - 0x1.7481a9e69f53fp-37, - 0x1.5847eda620959p-37, - 0x1.3e27c1fcc74bdp-37, - 0x1.25f9ee0b923dcp-37, - 0x1.0f9a0686531ffp-37, - 0x1.f5cc7718082afp-38, - 0x1.cf7e53d6a2ca5p-38, - 0x1.ac0f5f3229372p-38, - 0x1.8b498644847eap-38, - 0x1.6cfa9bcca59dcp-38, - 0x1.50f411d4fd2cdp-38, - 0x1.370ab8327af5ep-38, - 0x1.1f167f88c6b6ep-38, - 0x1.08f24085d4597p-38, - 0x1.e8f70e181d619p-39, - 0x1.c324c20e337dcp-39, - 0x1.a03261574b54ep-39, - 0x1.7fe903cdf5855p-39, - 0x1.6215c58da3450p-39, - 0x1.46897d4b69fc6p-39, - 0x1.2d1877d731b7bp-39, - 0x1.159a386b11517p-39, - 0x1.ffd27ae9393cep-40, - 0x1.d7c593130dd0bp-40, - 0x1.b2cd607c79bcfp-40, - 0x1.90ae4d3405651p-40, - 0x1.71312dd1759e2p-40, - 0x1.5422ef5d8949dp-40, - 0x1.39544b0ecc957p-40, - 0x1.20997f73e73ddp-40, - 0x1.09ca0eaacd277p-40, - 0x1.e9810295890ecp-41, - 0x1.c2b45b5aa4a1dp-41, - 0x1.9eee068fa7596p-41, - 0x1.7df2b399c10a8p-41, - 0x1.5f8b87a31bd85p-41, - 0x1.4385c96e9a2d9p-41, - 0x1.29b2933ef4cbcp-41, - 0x1.11e68a6378f8ap-41, - 0x1.f7f338086a86bp-42, - 0x1.cf8d7d9ce040ap-42, - 0x1.aa577251ae484p-42, - 0x1.8811d739efb5ep-42, - 0x1.68823e52970bep-42, - 0x1.4b72ae68e8b4cp-42, - 0x1.30b14dbe876bcp-42, - 0x1.181012ef86610p-42, - 0x1.01647ba798744p-42, - 0x1.d90e917701675p-43, - 0x1.b2a87e86d0c8ap-43, - 0x1.8f53dcb377293p-43, - 0x1.6ed2f2515e933p-43, - 0x1.50ecc9ed47f19p-43, - 0x1.356cd5ce7799ep-43, - 0x1.1c229a587ab78p-43, - 0x1.04e15ecc7f3f6p-43, - 0x1.deffc7e6a6017p-44, - 0x1.b7b040832f310p-44, - 0x1.938e021f36d76p-44, - 0x1.7258610b3b233p-44, - 0x1.53d3bfc82a909p-44, - 0x1.37c92babdc2fdp-44, - 0x1.1e06010120f6ap-44, - 0x1.065b9616170d4p-44, - 0x1.e13dd96b3753ap-45, - 0x1.b950d32467392p-45, - 0x1.94a72263259a5p-45, - 0x1.72fd93e036cdcp-45, - 0x1.54164576929abp-45, - 0x1.37b83c521fe96p-45, - 0x1.1daf033182e96p-45, - 0x1.05ca50205d26ap-45, - 0x1.dfbb6235639fap-46, - 0x1.b7807e294781fp-46, - 0x1.9298add70a734p-46, - 0x1.70beaf9c7ffb6p-46, - 0x1.51b2cd6709222p-46, - 0x1.353a6cf7f7fffp-46, - 0x1.1b1fa8cbe84a7p-46, - 0x1.0330f0fd69921p-46, - 0x1.da81670f96f9bp-47, - 0x1.b24a16b4d09aap-47, - 0x1.8d6eeb6efdbd6p-47, - 0x1.6ba91ac734785p-47, - 0x1.4cb7966770ab5p-47, - 0x1.305e9721d0981p-47, - 0x1.1667311fff70ap-47, - 0x1.fd3de10d62855p-48, - 0x1.d1aefbcd48d0cp-48, - 0x1.a9cc93c25aca9p-48, - 0x1.85487ee3ea735p-48, - 0x1.63daf8b4b1e0cp-48, - 0x1.45421e69a6ca1p-48, - 0x1.294175802d99ap-48, - 0x1.0fa17bf41068fp-48, - 0x1.f05e82aae2bb9p-49, - 0x1.c578101b29058p-49, - 0x1.9e39dc5dd2f7cp-49, - 0x1.7a553a728bbf2p-49, - 0x1.5982008db1304p-49, - 0x1.3b7e00422e51bp-49, - 0x1.200c898d9ee3ep-49, - 0x1.06f5f7eb65a56p-49, - 0x1.e00e9148a1d25p-50, - 0x1.b623734024e92p-50, - 0x1.8fd4e01891bf8p-50, - 0x1.6cd44c7470d89p-50, - 0x1.4cd9c04158cd7p-50, - 0x1.2fa34bf5c8344p-50, - 0x1.14f4890ff2461p-50, - 0x1.f92c49dfa4df5p-51, - 0x1.ccaaea71ab0dfp-51, - 0x1.a40829f001197p-51, - 0x1.7eef13b59e96cp-51, - 0x1.5d11e1a252bf5p-51, - 0x1.3e296303b2297p-51, - 0x1.21f47009f43cep-51, - 0x1.083768c5e4541p-51, - 0x1.e1777d831265ep-52, - 0x1.b69f10b0191b5p-52, - 0x1.8f8a3a05b5b52p-52, - 0x1.6be573c40c8e7p-52, - 0x1.4b645ba991fdbp-52, - 0x1.2dc119095729fp-52, - }, -}; diff --git a/sysdeps/aarch64/fpu/sv_erff_data.c b/sysdeps/aarch64/fpu/sv_erff_data.c deleted file mode 100644 index 6dcd72af69..0000000000 --- a/sysdeps/aarch64/fpu/sv_erff_data.c +++ /dev/null @@ -1,1058 +0,0 @@ -/* Table for SVE erff approximation - - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include "vecmath_config.h" - -/* Lookup table used in SVE erff. - For each possible rounded input r (multiples of 1/128), between - r = 0.0 and r = 4.0 (513 values): - - __erff_data.erf contains the values of erf(r), - - __erff_data.scale contains the values of 2/sqrt(pi)*exp(-r^2). - Note that indices 0 and 1 are never hit by the algorithm, since lookup is - performed only for x >= 1/64-1/512. */ -const struct sv_erff_data __sv_erff_data = { - .erf = { 0x0.000000p+0, - 0x1.20dbf4p-7, - 0x1.20d770p-6, - 0x1.b137e0p-6, - 0x1.20c564p-5, - 0x1.68e5d4p-5, - 0x1.b0fafep-5, - 0x1.f902a8p-5, - 0x1.207d48p-4, - 0x1.44703ep-4, - 0x1.68591ap-4, - 0x1.8c36bep-4, - 0x1.b00812p-4, - 0x1.d3cbf8p-4, - 0x1.f7815ap-4, - 0x1.0d9390p-3, - 0x1.1f5e1ap-3, - 0x1.311fc2p-3, - 0x1.42d7fcp-3, - 0x1.548642p-3, - 0x1.662a0cp-3, - 0x1.77c2d2p-3, - 0x1.895010p-3, - 0x1.9ad142p-3, - 0x1.ac45e4p-3, - 0x1.bdad72p-3, - 0x1.cf076ep-3, - 0x1.e05354p-3, - 0x1.f190aap-3, - 0x1.015f78p-2, - 0x1.09eed6p-2, - 0x1.127632p-2, - 0x1.1af54ep-2, - 0x1.236bf0p-2, - 0x1.2bd9dcp-2, - 0x1.343ed6p-2, - 0x1.3c9aa8p-2, - 0x1.44ed18p-2, - 0x1.4d35f0p-2, - 0x1.5574f4p-2, - 0x1.5da9f4p-2, - 0x1.65d4b8p-2, - 0x1.6df50ap-2, - 0x1.760abap-2, - 0x1.7e1594p-2, - 0x1.861566p-2, - 0x1.8e0a02p-2, - 0x1.95f336p-2, - 0x1.9dd0d2p-2, - 0x1.a5a2acp-2, - 0x1.ad6896p-2, - 0x1.b52264p-2, - 0x1.bccfecp-2, - 0x1.c47104p-2, - 0x1.cc0584p-2, - 0x1.d38d44p-2, - 0x1.db081cp-2, - 0x1.e275eap-2, - 0x1.e9d68ap-2, - 0x1.f129d4p-2, - 0x1.f86faap-2, - 0x1.ffa7eap-2, - 0x1.03693ap-1, - 0x1.06f794p-1, - 0x1.0a7ef6p-1, - 0x1.0dff50p-1, - 0x1.117894p-1, - 0x1.14eab4p-1, - 0x1.1855a6p-1, - 0x1.1bb95cp-1, - 0x1.1f15ccp-1, - 0x1.226ae8p-1, - 0x1.25b8a8p-1, - 0x1.28ff02p-1, - 0x1.2c3decp-1, - 0x1.2f755cp-1, - 0x1.32a54cp-1, - 0x1.35cdb4p-1, - 0x1.38ee8ap-1, - 0x1.3c07cap-1, - 0x1.3f196ep-1, - 0x1.42236ep-1, - 0x1.4525c8p-1, - 0x1.482074p-1, - 0x1.4b1372p-1, - 0x1.4dfebap-1, - 0x1.50e24cp-1, - 0x1.53be26p-1, - 0x1.569244p-1, - 0x1.595ea6p-1, - 0x1.5c2348p-1, - 0x1.5ee02ep-1, - 0x1.619556p-1, - 0x1.6442c0p-1, - 0x1.66e86ep-1, - 0x1.69865ep-1, - 0x1.6c1c98p-1, - 0x1.6eab18p-1, - 0x1.7131e6p-1, - 0x1.73b102p-1, - 0x1.762870p-1, - 0x1.789836p-1, - 0x1.7b0058p-1, - 0x1.7d60d8p-1, - 0x1.7fb9c0p-1, - 0x1.820b12p-1, - 0x1.8454d6p-1, - 0x1.869712p-1, - 0x1.88d1cep-1, - 0x1.8b050ep-1, - 0x1.8d30dep-1, - 0x1.8f5544p-1, - 0x1.91724ap-1, - 0x1.9387f6p-1, - 0x1.959652p-1, - 0x1.979d68p-1, - 0x1.999d42p-1, - 0x1.9b95e8p-1, - 0x1.9d8768p-1, - 0x1.9f71cap-1, - 0x1.a1551ap-1, - 0x1.a33162p-1, - 0x1.a506b0p-1, - 0x1.a6d50cp-1, - 0x1.a89c86p-1, - 0x1.aa5d26p-1, - 0x1.ac16fcp-1, - 0x1.adca14p-1, - 0x1.af767ap-1, - 0x1.b11c3cp-1, - 0x1.b2bb68p-1, - 0x1.b4540ap-1, - 0x1.b5e630p-1, - 0x1.b771e8p-1, - 0x1.b8f742p-1, - 0x1.ba764ap-1, - 0x1.bbef10p-1, - 0x1.bd61a2p-1, - 0x1.bece0ep-1, - 0x1.c03464p-1, - 0x1.c194b2p-1, - 0x1.c2ef08p-1, - 0x1.c44376p-1, - 0x1.c5920ap-1, - 0x1.c6dad2p-1, - 0x1.c81de2p-1, - 0x1.c95b46p-1, - 0x1.ca930ep-1, - 0x1.cbc54cp-1, - 0x1.ccf20cp-1, - 0x1.ce1962p-1, - 0x1.cf3b5cp-1, - 0x1.d0580cp-1, - 0x1.d16f7ep-1, - 0x1.d281c4p-1, - 0x1.d38ef0p-1, - 0x1.d49710p-1, - 0x1.d59a34p-1, - 0x1.d6986cp-1, - 0x1.d791cap-1, - 0x1.d8865ep-1, - 0x1.d97636p-1, - 0x1.da6162p-1, - 0x1.db47f4p-1, - 0x1.dc29fcp-1, - 0x1.dd0788p-1, - 0x1.dde0aap-1, - 0x1.deb570p-1, - 0x1.df85eap-1, - 0x1.e0522ap-1, - 0x1.e11a3ep-1, - 0x1.e1de36p-1, - 0x1.e29e22p-1, - 0x1.e35a12p-1, - 0x1.e41214p-1, - 0x1.e4c638p-1, - 0x1.e5768cp-1, - 0x1.e62322p-1, - 0x1.e6cc08p-1, - 0x1.e7714ap-1, - 0x1.e812fcp-1, - 0x1.e8b12ap-1, - 0x1.e94be4p-1, - 0x1.e9e336p-1, - 0x1.ea7730p-1, - 0x1.eb07e2p-1, - 0x1.eb9558p-1, - 0x1.ec1fa2p-1, - 0x1.eca6ccp-1, - 0x1.ed2ae6p-1, - 0x1.edabfcp-1, - 0x1.ee2a1ep-1, - 0x1.eea556p-1, - 0x1.ef1db4p-1, - 0x1.ef9344p-1, - 0x1.f00614p-1, - 0x1.f07630p-1, - 0x1.f0e3a6p-1, - 0x1.f14e82p-1, - 0x1.f1b6d0p-1, - 0x1.f21ca0p-1, - 0x1.f27ff8p-1, - 0x1.f2e0eap-1, - 0x1.f33f7ep-1, - 0x1.f39bc2p-1, - 0x1.f3f5c2p-1, - 0x1.f44d88p-1, - 0x1.f4a31ep-1, - 0x1.f4f694p-1, - 0x1.f547f2p-1, - 0x1.f59742p-1, - 0x1.f5e490p-1, - 0x1.f62fe8p-1, - 0x1.f67952p-1, - 0x1.f6c0dcp-1, - 0x1.f7068cp-1, - 0x1.f74a6ep-1, - 0x1.f78c8cp-1, - 0x1.f7cceep-1, - 0x1.f80ba2p-1, - 0x1.f848acp-1, - 0x1.f8841ap-1, - 0x1.f8bdf2p-1, - 0x1.f8f63ep-1, - 0x1.f92d08p-1, - 0x1.f96256p-1, - 0x1.f99634p-1, - 0x1.f9c8a8p-1, - 0x1.f9f9bap-1, - 0x1.fa2974p-1, - 0x1.fa57dep-1, - 0x1.fa84fep-1, - 0x1.fab0dep-1, - 0x1.fadb84p-1, - 0x1.fb04f6p-1, - 0x1.fb2d40p-1, - 0x1.fb5464p-1, - 0x1.fb7a6cp-1, - 0x1.fb9f60p-1, - 0x1.fbc344p-1, - 0x1.fbe61ep-1, - 0x1.fc07fap-1, - 0x1.fc28d8p-1, - 0x1.fc48c2p-1, - 0x1.fc67bcp-1, - 0x1.fc85d0p-1, - 0x1.fca2fep-1, - 0x1.fcbf52p-1, - 0x1.fcdaccp-1, - 0x1.fcf576p-1, - 0x1.fd0f54p-1, - 0x1.fd286ap-1, - 0x1.fd40bep-1, - 0x1.fd5856p-1, - 0x1.fd6f34p-1, - 0x1.fd8562p-1, - 0x1.fd9ae2p-1, - 0x1.fdafb8p-1, - 0x1.fdc3e8p-1, - 0x1.fdd77ap-1, - 0x1.fdea6ep-1, - 0x1.fdfcccp-1, - 0x1.fe0e96p-1, - 0x1.fe1fd0p-1, - 0x1.fe3080p-1, - 0x1.fe40a6p-1, - 0x1.fe504cp-1, - 0x1.fe5f70p-1, - 0x1.fe6e18p-1, - 0x1.fe7c46p-1, - 0x1.fe8a00p-1, - 0x1.fe9748p-1, - 0x1.fea422p-1, - 0x1.feb090p-1, - 0x1.febc96p-1, - 0x1.fec836p-1, - 0x1.fed374p-1, - 0x1.fede52p-1, - 0x1.fee8d4p-1, - 0x1.fef2fep-1, - 0x1.fefccep-1, - 0x1.ff064cp-1, - 0x1.ff0f76p-1, - 0x1.ff1852p-1, - 0x1.ff20e0p-1, - 0x1.ff2924p-1, - 0x1.ff3120p-1, - 0x1.ff38d6p-1, - 0x1.ff4048p-1, - 0x1.ff4778p-1, - 0x1.ff4e68p-1, - 0x1.ff551ap-1, - 0x1.ff5b90p-1, - 0x1.ff61ccp-1, - 0x1.ff67d0p-1, - 0x1.ff6d9ep-1, - 0x1.ff7338p-1, - 0x1.ff789ep-1, - 0x1.ff7dd4p-1, - 0x1.ff82dap-1, - 0x1.ff87b2p-1, - 0x1.ff8c5cp-1, - 0x1.ff90dcp-1, - 0x1.ff9532p-1, - 0x1.ff9960p-1, - 0x1.ff9d68p-1, - 0x1.ffa14ap-1, - 0x1.ffa506p-1, - 0x1.ffa8a0p-1, - 0x1.ffac18p-1, - 0x1.ffaf6ep-1, - 0x1.ffb2a6p-1, - 0x1.ffb5bep-1, - 0x1.ffb8b8p-1, - 0x1.ffbb98p-1, - 0x1.ffbe5ap-1, - 0x1.ffc102p-1, - 0x1.ffc390p-1, - 0x1.ffc606p-1, - 0x1.ffc862p-1, - 0x1.ffcaa8p-1, - 0x1.ffccd8p-1, - 0x1.ffcef4p-1, - 0x1.ffd0fap-1, - 0x1.ffd2eap-1, - 0x1.ffd4cap-1, - 0x1.ffd696p-1, - 0x1.ffd84ep-1, - 0x1.ffd9f8p-1, - 0x1.ffdb90p-1, - 0x1.ffdd18p-1, - 0x1.ffde90p-1, - 0x1.ffdffap-1, - 0x1.ffe154p-1, - 0x1.ffe2a2p-1, - 0x1.ffe3e2p-1, - 0x1.ffe514p-1, - 0x1.ffe63cp-1, - 0x1.ffe756p-1, - 0x1.ffe866p-1, - 0x1.ffe96ap-1, - 0x1.ffea64p-1, - 0x1.ffeb54p-1, - 0x1.ffec3ap-1, - 0x1.ffed16p-1, - 0x1.ffedeap-1, - 0x1.ffeeb4p-1, - 0x1.ffef76p-1, - 0x1.fff032p-1, - 0x1.fff0e4p-1, - 0x1.fff18ep-1, - 0x1.fff232p-1, - 0x1.fff2d0p-1, - 0x1.fff366p-1, - 0x1.fff3f6p-1, - 0x1.fff480p-1, - 0x1.fff504p-1, - 0x1.fff582p-1, - 0x1.fff5fcp-1, - 0x1.fff670p-1, - 0x1.fff6dep-1, - 0x1.fff74ap-1, - 0x1.fff7aep-1, - 0x1.fff810p-1, - 0x1.fff86cp-1, - 0x1.fff8c6p-1, - 0x1.fff91cp-1, - 0x1.fff96cp-1, - 0x1.fff9bap-1, - 0x1.fffa04p-1, - 0x1.fffa4cp-1, - 0x1.fffa90p-1, - 0x1.fffad0p-1, - 0x1.fffb0ep-1, - 0x1.fffb4ap-1, - 0x1.fffb82p-1, - 0x1.fffbb8p-1, - 0x1.fffbecp-1, - 0x1.fffc1ep-1, - 0x1.fffc4ep-1, - 0x1.fffc7ap-1, - 0x1.fffca6p-1, - 0x1.fffccep-1, - 0x1.fffcf6p-1, - 0x1.fffd1ap-1, - 0x1.fffd3ep-1, - 0x1.fffd60p-1, - 0x1.fffd80p-1, - 0x1.fffda0p-1, - 0x1.fffdbep-1, - 0x1.fffddap-1, - 0x1.fffdf4p-1, - 0x1.fffe0ep-1, - 0x1.fffe26p-1, - 0x1.fffe3ep-1, - 0x1.fffe54p-1, - 0x1.fffe68p-1, - 0x1.fffe7ep-1, - 0x1.fffe90p-1, - 0x1.fffea2p-1, - 0x1.fffeb4p-1, - 0x1.fffec4p-1, - 0x1.fffed4p-1, - 0x1.fffee4p-1, - 0x1.fffef2p-1, - 0x1.ffff00p-1, - 0x1.ffff0cp-1, - 0x1.ffff18p-1, - 0x1.ffff24p-1, - 0x1.ffff30p-1, - 0x1.ffff3ap-1, - 0x1.ffff44p-1, - 0x1.ffff4ep-1, - 0x1.ffff56p-1, - 0x1.ffff60p-1, - 0x1.ffff68p-1, - 0x1.ffff70p-1, - 0x1.ffff78p-1, - 0x1.ffff7ep-1, - 0x1.ffff84p-1, - 0x1.ffff8cp-1, - 0x1.ffff92p-1, - 0x1.ffff98p-1, - 0x1.ffff9cp-1, - 0x1.ffffa2p-1, - 0x1.ffffa6p-1, - 0x1.ffffacp-1, - 0x1.ffffb0p-1, - 0x1.ffffb4p-1, - 0x1.ffffb8p-1, - 0x1.ffffbcp-1, - 0x1.ffffc0p-1, - 0x1.ffffc4p-1, - 0x1.ffffc6p-1, - 0x1.ffffcap-1, - 0x1.ffffccp-1, - 0x1.ffffd0p-1, - 0x1.ffffd2p-1, - 0x1.ffffd4p-1, - 0x1.ffffd6p-1, - 0x1.ffffd8p-1, - 0x1.ffffdcp-1, - 0x1.ffffdep-1, - 0x1.ffffdep-1, - 0x1.ffffe0p-1, - 0x1.ffffe2p-1, - 0x1.ffffe4p-1, - 0x1.ffffe6p-1, - 0x1.ffffe8p-1, - 0x1.ffffe8p-1, - 0x1.ffffeap-1, - 0x1.ffffeap-1, - 0x1.ffffecp-1, - 0x1.ffffeep-1, - 0x1.ffffeep-1, - 0x1.fffff0p-1, - 0x1.fffff0p-1, - 0x1.fffff2p-1, - 0x1.fffff2p-1, - 0x1.fffff2p-1, - 0x1.fffff4p-1, - 0x1.fffff4p-1, - 0x1.fffff4p-1, - 0x1.fffff6p-1, - 0x1.fffff6p-1, - 0x1.fffff6p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - }, - .scale = { 0x1.20dd76p+0, - 0x1.20d8f2p+0, - 0x1.20cb68p+0, - 0x1.20b4d8p+0, - 0x1.209546p+0, - 0x1.206cb4p+0, - 0x1.203b26p+0, - 0x1.2000a0p+0, - 0x1.1fbd28p+0, - 0x1.1f70c4p+0, - 0x1.1f1b7ap+0, - 0x1.1ebd56p+0, - 0x1.1e565cp+0, - 0x1.1de698p+0, - 0x1.1d6e14p+0, - 0x1.1cecdcp+0, - 0x1.1c62fap+0, - 0x1.1bd07cp+0, - 0x1.1b3572p+0, - 0x1.1a91e6p+0, - 0x1.19e5eap+0, - 0x1.19318cp+0, - 0x1.1874dep+0, - 0x1.17aff0p+0, - 0x1.16e2d8p+0, - 0x1.160da4p+0, - 0x1.153068p+0, - 0x1.144b3cp+0, - 0x1.135e30p+0, - 0x1.12695ep+0, - 0x1.116cd8p+0, - 0x1.1068bap+0, - 0x1.0f5d16p+0, - 0x1.0e4a08p+0, - 0x1.0d2fa6p+0, - 0x1.0c0e0ap+0, - 0x1.0ae550p+0, - 0x1.09b590p+0, - 0x1.087ee4p+0, - 0x1.07416cp+0, - 0x1.05fd3ep+0, - 0x1.04b27cp+0, - 0x1.036140p+0, - 0x1.0209a6p+0, - 0x1.00abd0p+0, - 0x1.fe8fb0p-1, - 0x1.fbbbbep-1, - 0x1.f8dc0ap-1, - 0x1.f5f0cep-1, - 0x1.f2fa4cp-1, - 0x1.eff8c4p-1, - 0x1.ecec78p-1, - 0x1.e9d5a8p-1, - 0x1.e6b498p-1, - 0x1.e38988p-1, - 0x1.e054bep-1, - 0x1.dd167cp-1, - 0x1.d9cf06p-1, - 0x1.d67ea2p-1, - 0x1.d32592p-1, - 0x1.cfc41ep-1, - 0x1.cc5a8ap-1, - 0x1.c8e91cp-1, - 0x1.c5701ap-1, - 0x1.c1efcap-1, - 0x1.be6872p-1, - 0x1.bada5ap-1, - 0x1.b745c6p-1, - 0x1.b3aafcp-1, - 0x1.b00a46p-1, - 0x1.ac63e8p-1, - 0x1.a8b828p-1, - 0x1.a5074ep-1, - 0x1.a1519ep-1, - 0x1.9d9762p-1, - 0x1.99d8dap-1, - 0x1.961650p-1, - 0x1.925008p-1, - 0x1.8e8646p-1, - 0x1.8ab950p-1, - 0x1.86e96ap-1, - 0x1.8316d6p-1, - 0x1.7f41dcp-1, - 0x1.7b6abcp-1, - 0x1.7791b8p-1, - 0x1.73b714p-1, - 0x1.6fdb12p-1, - 0x1.6bfdf0p-1, - 0x1.681ff2p-1, - 0x1.644156p-1, - 0x1.60625cp-1, - 0x1.5c8342p-1, - 0x1.58a446p-1, - 0x1.54c5a6p-1, - 0x1.50e79ep-1, - 0x1.4d0a68p-1, - 0x1.492e42p-1, - 0x1.455366p-1, - 0x1.417a0cp-1, - 0x1.3da26ep-1, - 0x1.39ccc2p-1, - 0x1.35f940p-1, - 0x1.32281ep-1, - 0x1.2e5992p-1, - 0x1.2a8dcep-1, - 0x1.26c508p-1, - 0x1.22ff72p-1, - 0x1.1f3d3cp-1, - 0x1.1b7e98p-1, - 0x1.17c3b6p-1, - 0x1.140cc4p-1, - 0x1.1059eep-1, - 0x1.0cab62p-1, - 0x1.09014cp-1, - 0x1.055bd6p-1, - 0x1.01bb2cp-1, - 0x1.fc3ee6p-2, - 0x1.f511aap-2, - 0x1.edeeeep-2, - 0x1.e6d700p-2, - 0x1.dfca26p-2, - 0x1.d8c8aap-2, - 0x1.d1d2d0p-2, - 0x1.cae8dap-2, - 0x1.c40b08p-2, - 0x1.bd3998p-2, - 0x1.b674c8p-2, - 0x1.afbcd4p-2, - 0x1.a911f0p-2, - 0x1.a27456p-2, - 0x1.9be438p-2, - 0x1.9561c8p-2, - 0x1.8eed36p-2, - 0x1.8886b2p-2, - 0x1.822e66p-2, - 0x1.7be47ap-2, - 0x1.75a91ap-2, - 0x1.6f7c6ap-2, - 0x1.695e8cp-2, - 0x1.634fa6p-2, - 0x1.5d4fd4p-2, - 0x1.575f34p-2, - 0x1.517de6p-2, - 0x1.4bac00p-2, - 0x1.45e99cp-2, - 0x1.4036d0p-2, - 0x1.3a93b2p-2, - 0x1.350052p-2, - 0x1.2f7cc4p-2, - 0x1.2a0916p-2, - 0x1.24a554p-2, - 0x1.1f518ap-2, - 0x1.1a0dc6p-2, - 0x1.14da0ap-2, - 0x1.0fb662p-2, - 0x1.0aa2d0p-2, - 0x1.059f5ap-2, - 0x1.00ac00p-2, - 0x1.f79184p-3, - 0x1.edeb40p-3, - 0x1.e46530p-3, - 0x1.daff4ap-3, - 0x1.d1b982p-3, - 0x1.c893cep-3, - 0x1.bf8e1cp-3, - 0x1.b6a856p-3, - 0x1.ade26cp-3, - 0x1.a53c42p-3, - 0x1.9cb5bep-3, - 0x1.944ec2p-3, - 0x1.8c0732p-3, - 0x1.83deeap-3, - 0x1.7bd5c8p-3, - 0x1.73eba4p-3, - 0x1.6c2056p-3, - 0x1.6473b6p-3, - 0x1.5ce596p-3, - 0x1.5575c8p-3, - 0x1.4e241ep-3, - 0x1.46f066p-3, - 0x1.3fda6cp-3, - 0x1.38e1fap-3, - 0x1.3206dcp-3, - 0x1.2b48dap-3, - 0x1.24a7b8p-3, - 0x1.1e233ep-3, - 0x1.17bb2cp-3, - 0x1.116f48p-3, - 0x1.0b3f52p-3, - 0x1.052b0cp-3, - 0x1.fe6460p-4, - 0x1.f2a902p-4, - 0x1.e72372p-4, - 0x1.dbd32ap-4, - 0x1.d0b7a0p-4, - 0x1.c5d04ap-4, - 0x1.bb1c98p-4, - 0x1.b09bfcp-4, - 0x1.a64de6p-4, - 0x1.9c31c6p-4, - 0x1.92470ap-4, - 0x1.888d1ep-4, - 0x1.7f036cp-4, - 0x1.75a960p-4, - 0x1.6c7e64p-4, - 0x1.6381e2p-4, - 0x1.5ab342p-4, - 0x1.5211ecp-4, - 0x1.499d48p-4, - 0x1.4154bcp-4, - 0x1.3937b2p-4, - 0x1.31458ep-4, - 0x1.297dbap-4, - 0x1.21df9ap-4, - 0x1.1a6a96p-4, - 0x1.131e14p-4, - 0x1.0bf97ep-4, - 0x1.04fc3ap-4, - 0x1.fc4b5ep-5, - 0x1.eeea8cp-5, - 0x1.e1d4d0p-5, - 0x1.d508fap-5, - 0x1.c885e0p-5, - 0x1.bc4a54p-5, - 0x1.b05530p-5, - 0x1.a4a54ap-5, - 0x1.99397ap-5, - 0x1.8e109cp-5, - 0x1.83298ep-5, - 0x1.78832cp-5, - 0x1.6e1c58p-5, - 0x1.63f3f6p-5, - 0x1.5a08e8p-5, - 0x1.505a18p-5, - 0x1.46e66cp-5, - 0x1.3dacd2p-5, - 0x1.34ac36p-5, - 0x1.2be38cp-5, - 0x1.2351c2p-5, - 0x1.1af5d2p-5, - 0x1.12ceb4p-5, - 0x1.0adb60p-5, - 0x1.031ad6p-5, - 0x1.f7182ap-6, - 0x1.e85c44p-6, - 0x1.da0006p-6, - 0x1.cc0180p-6, - 0x1.be5ecep-6, - 0x1.b1160ap-6, - 0x1.a4255ap-6, - 0x1.978ae8p-6, - 0x1.8b44e6p-6, - 0x1.7f5188p-6, - 0x1.73af0cp-6, - 0x1.685bb6p-6, - 0x1.5d55ccp-6, - 0x1.529b9ep-6, - 0x1.482b84p-6, - 0x1.3e03d8p-6, - 0x1.3422fep-6, - 0x1.2a875cp-6, - 0x1.212f62p-6, - 0x1.181984p-6, - 0x1.0f443ep-6, - 0x1.06ae14p-6, - 0x1.fcab14p-7, - 0x1.ec7262p-7, - 0x1.dcaf36p-7, - 0x1.cd5ecap-7, - 0x1.be7e5ap-7, - 0x1.b00b38p-7, - 0x1.a202bep-7, - 0x1.94624ep-7, - 0x1.87275ep-7, - 0x1.7a4f6ap-7, - 0x1.6dd7fep-7, - 0x1.61beaep-7, - 0x1.56011cp-7, - 0x1.4a9cf6p-7, - 0x1.3f8ff6p-7, - 0x1.34d7dcp-7, - 0x1.2a727ap-7, - 0x1.205dacp-7, - 0x1.169756p-7, - 0x1.0d1d6ap-7, - 0x1.03ede2p-7, - 0x1.f60d8ap-8, - 0x1.e4cc4ap-8, - 0x1.d4143ap-8, - 0x1.c3e1a6p-8, - 0x1.b430ecp-8, - 0x1.a4fe84p-8, - 0x1.9646f4p-8, - 0x1.8806d8p-8, - 0x1.7a3adep-8, - 0x1.6cdfccp-8, - 0x1.5ff276p-8, - 0x1.536fc2p-8, - 0x1.4754acp-8, - 0x1.3b9e40p-8, - 0x1.30499cp-8, - 0x1.2553eep-8, - 0x1.1aba78p-8, - 0x1.107a8cp-8, - 0x1.06918cp-8, - 0x1.f9f9d0p-9, - 0x1.e77448p-9, - 0x1.d58da6p-9, - 0x1.c4412cp-9, - 0x1.b38a3ap-9, - 0x1.a36454p-9, - 0x1.93cb12p-9, - 0x1.84ba30p-9, - 0x1.762d84p-9, - 0x1.682100p-9, - 0x1.5a90b0p-9, - 0x1.4d78bcp-9, - 0x1.40d564p-9, - 0x1.34a306p-9, - 0x1.28de12p-9, - 0x1.1d8318p-9, - 0x1.128ebap-9, - 0x1.07fdb4p-9, - 0x1.fb99b8p-10, - 0x1.e7f232p-10, - 0x1.d4fed8p-10, - 0x1.c2b9d0p-10, - 0x1.b11d70p-10, - 0x1.a02436p-10, - 0x1.8fc8c8p-10, - 0x1.8005f0p-10, - 0x1.70d6a4p-10, - 0x1.6235fcp-10, - 0x1.541f34p-10, - 0x1.468daep-10, - 0x1.397ceep-10, - 0x1.2ce898p-10, - 0x1.20cc76p-10, - 0x1.15246ep-10, - 0x1.09ec86p-10, - 0x1.fe41cep-11, - 0x1.e97ba4p-11, - 0x1.d57f52p-11, - 0x1.c245d4p-11, - 0x1.afc85ep-11, - 0x1.9e0058p-11, - 0x1.8ce75ep-11, - 0x1.7c7744p-11, - 0x1.6caa0ep-11, - 0x1.5d79ecp-11, - 0x1.4ee142p-11, - 0x1.40daa4p-11, - 0x1.3360ccp-11, - 0x1.266ea8p-11, - 0x1.19ff46p-11, - 0x1.0e0de8p-11, - 0x1.0295f0p-11, - 0x1.ef25d4p-12, - 0x1.da0110p-12, - 0x1.c5b542p-12, - 0x1.b23a5ap-12, - 0x1.9f8894p-12, - 0x1.8d986ap-12, - 0x1.7c629ap-12, - 0x1.6be022p-12, - 0x1.5c0a38p-12, - 0x1.4cda54p-12, - 0x1.3e4a24p-12, - 0x1.305390p-12, - 0x1.22f0b4p-12, - 0x1.161be4p-12, - 0x1.09cfa4p-12, - 0x1.fc0d56p-13, - 0x1.e577bcp-13, - 0x1.cfd4a6p-13, - 0x1.bb1a96p-13, - 0x1.a74068p-13, - 0x1.943d4ap-13, - 0x1.8208bcp-13, - 0x1.709a8ep-13, - 0x1.5feadap-13, - 0x1.4ff208p-13, - 0x1.40a8c2p-13, - 0x1.3207fcp-13, - 0x1.2408eap-13, - 0x1.16a502p-13, - 0x1.09d5f8p-13, - 0x1.fb2b7ap-14, - 0x1.e3bcf4p-14, - 0x1.cd5528p-14, - 0x1.b7e946p-14, - 0x1.a36eecp-14, - 0x1.8fdc1cp-14, - 0x1.7d2738p-14, - 0x1.6b4702p-14, - 0x1.5a329cp-14, - 0x1.49e178p-14, - 0x1.3a4b60p-14, - 0x1.2b6876p-14, - 0x1.1d3120p-14, - 0x1.0f9e1cp-14, - 0x1.02a868p-14, - 0x1.ec929ap-15, - 0x1.d4f4b4p-15, - 0x1.be6abcp-15, - 0x1.a8e8ccp-15, - 0x1.94637ep-15, - 0x1.80cfdcp-15, - 0x1.6e2368p-15, - 0x1.5c540cp-15, - 0x1.4b581cp-15, - 0x1.3b2652p-15, - 0x1.2bb5ccp-15, - 0x1.1cfe02p-15, - 0x1.0ef6c4p-15, - 0x1.019842p-15, - 0x1.e9b5e8p-16, - 0x1.d16f58p-16, - 0x1.ba4f04p-16, - 0x1.a447b8p-16, - 0x1.8f4cccp-16, - 0x1.7b5224p-16, - 0x1.684c22p-16, - 0x1.562facp-16, - 0x1.44f21ep-16, - 0x1.34894ap-16, - 0x1.24eb72p-16, - 0x1.160f44p-16, - 0x1.07ebd2p-16, - 0x1.f4f12ep-17, - 0x1.db5ad0p-17, - 0x1.c304f0p-17, - 0x1.abe09ep-17, - 0x1.95df98p-17, - 0x1.80f43ap-17, - 0x1.6d1178p-17, - 0x1.5a2ae0p-17, - 0x1.483488p-17, - 0x1.372310p-17, - 0x1.26eb9ep-17, - 0x1.1783cep-17, - 0x1.08e1bap-17, - 0x1.f5f7d8p-18, - 0x1.db92b6p-18, - 0x1.c282cep-18, - 0x1.aab7acp-18, - 0x1.94219cp-18, - 0x1.7eb1a2p-18, - 0x1.6a5972p-18, - 0x1.570b6ap-18, - 0x1.44ba86p-18, - 0x1.335a62p-18, - 0x1.22df2ap-18, - 0x1.133d96p-18, - 0x1.046aeap-18, - 0x1.ecb9d0p-19, - 0x1.d21398p-19, - 0x1.b8d094p-19, - 0x1.a0df10p-19, - 0x1.8a2e26p-19, - 0x1.74adc8p-19, - 0x1.604ea8p-19, - 0x1.4d0232p-19, - 0x1.3aba86p-19, - 0x1.296a70p-19, - 0x1.190562p-19, - 0x1.097f62p-19, - 0x1.f59a20p-20, - 0x1.d9c736p-20, - 0x1.bf716cp-20, - 0x1.a6852cp-20, - 0x1.8eefd8p-20, - 0x1.789fb8p-20, - 0x1.6383f8p-20, - 0x1.4f8c96p-20, - 0x1.3caa62p-20, - 0x1.2acee2p-20, - 0x1.19ec60p-20, - 0x1.09f5d0p-20, - 0x1.f5bd96p-21, - 0x1.d9371ep-21, - 0x1.be41dep-21, - 0x1.a4c89ep-21, - 0x1.8cb738p-21, - 0x1.75fa8ep-21, - 0x1.608078p-21, - 0x1.4c37c0p-21, - 0x1.39100ep-21, - 0x1.26f9e0p-21, - 0x1.15e682p-21, - 0x1.05c804p-21, - 0x1.ed2254p-22, - 0x1.d06ad6p-22, - 0x1.b551c8p-22, - 0x1.9bc0a0p-22, - 0x1.83a200p-22, - 0x1.6ce1aap-22, - 0x1.576c72p-22, - 0x1.43302cp-22, - 0x1.301ba2p-22, - 0x1.1e1e86p-22, - 0x1.0d2966p-22, - 0x1.fa5b50p-23, - 0x1.dc3ae4p-23, - 0x1.bfd756p-23, - 0x1.a517dap-23, - 0x1.8be4f8p-23, - 0x1.74287ep-23, - 0x1.5dcd66p-23, - 0x1.48bfd4p-23, - 0x1.34ecf8p-23, - 0x1.224310p-23, - 0x1.10b148p-23, - }, -}; diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h index 23963b5f8e..6166df6553 100644 --- a/sysdeps/aarch64/fpu/sv_expf_inline.h +++ b/sysdeps/aarch64/fpu/sv_expf_inline.h @@ -24,19 +24,20 @@ struct sv_expf_data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift; + float c1, c3, inv_ln2; + float ln2_lo, c0, c2, c4; + float ln2_hi, shift; }; /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */ #define SV_EXPF_DATA \ { \ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \ - 0x1.0e4020p-7f }, \ - \ - .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ - .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \ + /* Coefficients copied from the polynomial in AdvSIMD variant. */ \ + .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f, \ + .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ + .shift = 0x1.803f8p17f, \ } #define C(i) sv_f32 (d->poly[i]) @@ -47,26 +48,25 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - /* Load some constants in quad-word chunks to minimise memory access. */ - svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo); /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1); + svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift); svfloat32_t n = svsub_x (pg, z, d->shift); /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2); - r = svmls_lane (r, n, c4_invln2_and_ln2, 3); + svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x); + r = svmls_lane (r, n, lane_consts, 0); /* scale = 2^(n/N). */ - svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z)); + svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0); - svfloat32_t r2 = svmul_f32_x (pg, r, r); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_f32_x (pg, r, C (0)); + svfloat32_t p0 = svmul_lane (r, lane_consts, 1); svfloat32_t poly = svmla_x (pg, p0, r2, p14); return svmla_x (pg, scale, scale, poly); diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c index 50defd6ef0..3ced9b7a41 100644 --- a/sysdeps/aarch64/fpu/tanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c @@ -28,13 +28,16 @@ static const struct data /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */ .boring_bound = V4 (0x41102cb3), .large_bound = V4 (0x7f800000), - .onef = V4 (0x3f800000), }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring, + float32x4_t q, uint32x4_t special) { - return v_call_f32 (tanhf, x, y, special); + return v_call_f32 ( + tanhf, x, + vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))), + special); } /* Approximation for single-precision vector tanh(x), using a simplified @@ -50,7 +53,9 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t sign = veorq_u32 (ix, iax); uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound); - float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef)); + /* expm1 exponent bias is 1.0f reinterpreted to int. */ + float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 ( + sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias))); #if WANT_SIMD_EXCEPT /* If fp exceptions are to be triggered properly, set all special and boring @@ -66,10 +71,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts); - float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); + if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vbslq_f32 (is_boring, boring, y), special); + return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q, + special); + + float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); return vbslq_f32 (is_boring, boring, y); } libmvec_hidden_def (V_NAME_F1 (tanh)) diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h index 59b552da6b..1daedfdd51 100644 --- a/sysdeps/aarch64/fpu/v_expm1f_inline.h +++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h @@ -21,48 +21,47 @@ #define AARCH64_FPU_V_EXPM1F_INLINE_H #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "math_config.h" struct v_expm1f_data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; + float32x4_t c0, c2; int32x4_t exponent_bias; + float c1, c3, inv_ln2, c4; + float ln2_hi, ln2_lo; }; /* Coefficients generated using fpminimax with degree=5 in [-log(2)/2, - log(2)/2]. Exponent bias is asuint(1.0f). - invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */ + log(2)/2]. Exponent bias is asuint(1.0f). */ #define V_EXPM1F_DATA \ { \ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \ - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \ - .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \ + .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5), \ + .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \ + .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ } static inline float32x4_t expm1f_inline (float32x4_t x, const struct v_expm1f_data *d) { - /* Helper routine for calculating exp(x) - 1. - Copied from v_expm1f_1u6.c, with all special-case handling removed - the - calling routine should handle special values if required. */ + /* Helper routine for calculating exp(x) - 1. */ + + float32x2_t ln2 = vld1_f32 (&d->ln2_hi); + float32x4_t lane_consts = vld1q_f32 (&d->c1); /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); + float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2)); int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); + float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0); + f = vfmsq_lane_f32 (f, j, ln2, 1); - /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). - Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses - Horner. */ + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). */ float32x4_t f2 = vmulq_f32 (f, f); float32x4_t f4 = vmulq_f32 (f2, f2); - float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1); + float32x4_t p = vfmaq_f32 (p01, f2, p23); + p = vfmaq_laneq_f32 (p, f4, lane_consts, 3); p = vfmaq_f32 (f, f2, p); /* t = 2^i. */ diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h index 643a6cdcfc..73e45a942e 100644 --- a/sysdeps/aarch64/fpu/v_log1pf_inline.h +++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h @@ -25,54 +25,81 @@ struct v_log1pf_data { - float32x4_t poly[8], ln2; uint32x4_t four; int32x4_t three_quarters; + float c0, c3, c5, c7; + float32x4_t c4, c6, c1, c2, ln2; }; /* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients (1, -0.5) are not stored as they can be generated more efficiently. */ #define V_LOG1PF_CONSTANTS_TABLE \ { \ - .poly \ - = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \ - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \ - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \ - .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ - .three_quarters = V4 (0x3f400000) \ + .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f), \ + .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f, \ + .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f, \ + .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f, \ + .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ + .three_quarters = V4 (0x3f400000) \ } static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *c) +eval_poly (float32x4_t m, const struct v_log1pf_data *d) { - /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine - uses split Estrin, but this way reduces register pressure in the calling - routine). */ - float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]); + /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */ + float32x4_t c0357 = vld1q_f32 (&d->c0); + float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0); float32x4_t m2 = vmulq_f32 (m, m); - q = vfmaq_f32 (m, m2, q); - float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1); + float32x4_t p = vfmaq_f32 (p45, m2, p67); + p = vfmaq_f32 (p23, m2, p); + p = vfmaq_f32 (d->c1, m, p); p = vmulq_f32 (m2, p); - return vfmaq_f32 (q, m2, p); + p = vfmaq_f32 (m, m2, p); + return vfmaq_f32 (p, m2, q); } static inline float32x4_t -log1pf_inline (float32x4_t x, const struct v_log1pf_data d) +log1pf_inline (float32x4_t x, const struct v_log1pf_data *d) { - /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no - special-case handling. See that file for details of the algorithm. */ + /* Helper for calculating log(x + 1). */ + + /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m + is in [-0.25, 0.5]): + log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). + + We approximate log1p(m) with a polynomial, then scale by + k*log(2). Instead of doing this directly, we use an intermediate + scale factor s = 4*k*log(2) to ensure the scale is representable + as a normalised fp32 number. */ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); + + /* Choose k to scale x to the range [-1/4, 1/2]. */ int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters), + = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), v_s32 (0xff800000)); uint32x4_t ku = vreinterpretq_u32_s32 (k); - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku)); + + /* Scale up to ensure that the scale factor is representable as normalised + fp32 number, and scale m down accordingly. */ + float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); + + /* Scale x by exponent manipulation. */ float32x4_t m_scale = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); - float32x4_t p = eval_poly (m_scale, d.poly); + + /* Evaluate polynomial on the reduced interval. */ + float32x4_t p = eval_poly (m_scale, d); + + /* The scale factor to be applied back at the end - by multiplying float(k) + by 2^-23 we get the unbiased exponent of k. */ float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f)); - return vfmaq_f32 (p, scale_back, d.ln2); + + /* Apply the scaling back. */ + return vfmaq_f32 (p, scale_back, d->ln2); } #endif diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h index 7f0a8aa5f2..862eefaf8f 100644 --- a/sysdeps/aarch64/fpu/vecmath_config.h +++ b/sysdeps/aarch64/fpu/vecmath_config.h @@ -75,49 +75,37 @@ extern const struct v_log10_data } table[1 << V_LOG10_TABLE_BITS]; } __v_log10_data attribute_hidden; -extern const struct erff_data +extern const struct v_erff_data { struct { float erf, scale; } tab[513]; -} __erff_data attribute_hidden; +} __v_erff_data attribute_hidden; -extern const struct sv_erff_data -{ - float erf[513]; - float scale[513]; -} __sv_erff_data attribute_hidden; - -extern const struct erf_data +extern const struct v_erf_data { struct { double erf, scale; } tab[769]; -} __erf_data attribute_hidden; - -extern const struct sv_erf_data -{ - double erf[769]; - double scale[769]; -} __sv_erf_data attribute_hidden; +} __v_erf_data attribute_hidden; -extern const struct erfc_data +extern const struct v_erfc_data { struct { double erfc, scale; } tab[3488]; -} __erfc_data attribute_hidden; +} __v_erfc_data attribute_hidden; -extern const struct erfcf_data +extern const struct v_erfcf_data { struct { float erfc, scale; } tab[645]; -} __erfcf_data attribute_hidden; +} __v_erfcf_data attribute_hidden; /* Some data for AdvSIMD and SVE pow's internal exp and log. */ #define V_POW_EXP_TABLE_BITS 8 diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 846fb2c29e..c523d45802 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -801,6 +801,7 @@ float: 1 ldouble: 1 Function: Imaginary part of "csin": +float: 1 ldouble: 1 Function: Real part of "csin_downward": @@ -1083,22 +1084,18 @@ ldouble: 3 Function: "exp10m1": double: 4 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 6 Function: "exp10m1_upward": double: 5 -float: 3 ldouble: 6 Function: "exp2": @@ -1131,22 +1128,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 4 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 5 Function: "exp_advsimd": @@ -1171,7 +1164,6 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_advsimd": @@ -1180,7 +1172,6 @@ float: 1 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_sve": @@ -1189,12 +1180,10 @@ float: 1 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1357,27 +1346,22 @@ ldouble: 1 Function: "log10p1": double: 2 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 4 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 3 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 4 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_advsimd": @@ -1386,7 +1370,6 @@ float: 1 Function: "log1p_downward": double: 1 -float: 2 ldouble: 3 Function: "log1p_sve": @@ -1395,12 +1378,10 @@ float: 1 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1433,22 +1414,18 @@ ldouble: 1 Function: "log2p1": double: 2 -float: 2 ldouble: 3 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 3 Function: "log_advsimd": @@ -1474,22 +1451,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 1 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1653,22 +1626,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 7ef77ee8c9..b76dde1557 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized memset using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "memset-reg.h" #ifndef MEMSET # define MEMSET memset @@ -25,130 +25,131 @@ /* Assumptions: * - * ARMv8-a, AArch64, unaligned accesses + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. * */ -ENTRY (MEMSET) +#define dstin x0 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 +#define off x3 +#define dstend2 x5 +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) dup v0.16B, valw + cmp count, 16 + b.lo L(set_small) + add dstend, dstin, count + cmp count, 64 + b.hs L(set_128) - cmp count, 96 - b.hi L(set_long) - cmp count, 16 - b.hs L(set_medium) - mov val, v0.D[0] + /* Set 16..63 bytes. */ + mov off, 16 + and off, off, count, lsr 1 + sub dstend2, dstend, off + str q0, [dstin] + str q0, [dstin, off] + str q0, [dstend2, -16] + str q0, [dstend, -16] + ret + .p2align 4 /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret - nop -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] +L(set_small): + add dstend, dstin, count + cmp count, 4 + b.lo 2f + lsr off, count, 3 + sub dstend2, dstend, off, lsl 2 + str s0, [dstin] + str s0, [dstin, off, lsl 2] + str s0, [dstend2, -4] + str s0, [dstend, -4] ret + + /* Set 0..3 bytes. */ 2: cbz count, 3f + lsr off, count, 1 strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] + strb valw, [dstin, off] + strb valw, [dstend, -1] 3: ret - /* Set 17..96 bytes. */ -L(set_medium): - str q0, [dstin] - tbnz count, 6, L(set96) - str q0, [dstend, -16] - tbz count, 5, 1f - str q0, [dstin, 16] - str q0, [dstend, -32] -1: ret - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -L(set96): - str q0, [dstin, 16] +L(set_128): + bic dst, dstin, 15 + cmp count, 128 + b.hi L(set_long) + stp q0, q0, [dstin] stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - .p2align 3 - nop + .p2align 4 L(set_long): - and valw, valw, 255 - bic dst, dstin, 15 str q0, [dstin] - cmp count, 256 - ccmp valw, 0, 0, cs - b.eq L(try_zva) -L(no_zva): - sub count, dstend, dst /* Count is 16 too large. */ - sub dst, dst, 16 /* Dst is biased by -32. */ - sub count, count, 64 + 16 /* Adjust count and bias for loop. */ -1: stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64]! -L(tail64): - subs count, count, 64 - b.hi 1b -2: stp q0, q0, [dstend, -64] + str q0, [dst, 16] + tst valw, 255 + b.ne L(no_zva) +#ifndef ZVA64_ONLY + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) +#endif + stp q0, q0, [dst, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 64 + 64 /* Adjust count and bias for loop. */ + + /* Write last bytes before ZVA loop. */ + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] + + .p2align 4 +L(zva64_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva64_loop) ret -L(try_zva): -#ifndef ZVA64_ONLY .p2align 3 - mrs tmp1, dczid_el0 - tbnz tmp1w, 4, L(no_zva) - and tmp1w, tmp1w, 15 - cmp tmp1w, 4 /* ZVA size is 64 bytes. */ - b.ne L(zva_128) - nop -#endif - /* Write the first and last 64 byte aligned block using stp rather - than using DC ZVA. This is faster on some cores. - */ - .p2align 4 -L(zva_64): - str q0, [dst, 16] +L(no_zva): + sub count, dstend, dst /* Count is 32 too large. */ + sub count, count, 64 + 32 /* Adjust count and bias for loop. */ +L(no_zva_loop): stp q0, q0, [dst, 32] - bic dst, dst, 63 stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+64+64 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst add dst, dst, 64 subs count, count, 64 - b.hi 1b - stp q0, q0, [dst, 0] - stp q0, q0, [dst, 32] + b.hi L(no_zva_loop) stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret #ifndef ZVA64_ONLY - .p2align 3 + .p2align 4 L(zva_128): - cmp tmp1w, 5 /* ZVA size is 128 bytes. */ - b.ne L(zva_other) + cmp zva_val, 5 /* ZVA size is 128 bytes. */ + b.ne L(no_zva) - str q0, [dst, 16] stp q0, q0, [dst, 32] stp q0, q0, [dst, 64] stp q0, q0, [dst, 96] bic dst, dst, 127 sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+128 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 128 + sub count, count, 128 + 128 /* Adjust count and bias for loop. */ +1: add dst, dst, 128 + dc zva, dst subs count, count, 128 b.hi 1b stp q0, q0, [dstend, -128] @@ -156,35 +157,6 @@ L(zva_128): stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - -L(zva_other): - mov tmp2w, 4 - lsl zva_lenw, tmp2w, tmp1w - add tmp1, zva_len, 64 /* Max alignment bytes written. */ - cmp count, tmp1 - blo L(no_zva) - - sub tmp2, zva_len, 1 - add tmp1, dst, zva_len - add dst, dst, 16 - subs count, tmp1, dst /* Actual alignment bytes to write. */ - bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ - beq 2f -1: stp q0, q0, [dst], 64 - stp q0, q0, [dst, -32] - subs count, count, 64 - b.hi 1b -2: mov dst, tmp1 - sub count, dstend, tmp1 /* Remaining bytes to write. */ - subs count, count, zva_len - b.lo 4f -3: dc zva, dst - add dst, dst, zva_len - subs count, count, zva_len - b.hs 3b -4: add count, count, zva_len - sub dst, dst, 32 /* Bias dst for tail loop. */ - b L(tail64) #endif END (MEMSET) diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index 2e6d882fc9..f665b5a891 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <sysdeps/aarch64/memset-reg.h> /* Assumptions: * @@ -36,6 +35,14 @@ .arch armv8.2-a+sve +#define dstin x0 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define tmp1 x5 +#define tmp2 x6 + .macro st1b_unroll first=0, last=7 st1b z0.b, p0, [dst, \first, mul vl] .if \last-\first diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S index 6d714ed0e1..cf1b25f2ed 100644 --- a/sysdeps/aarch64/multiarch/memset_emag.S +++ b/sysdeps/aarch64/multiarch/memset_emag.S @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "memset-reg.h" /* Assumptions: * @@ -26,6 +25,13 @@ * */ +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 + ENTRY (__memset_emag) PTR_ARG (0) diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S index 7b21550137..f815c20b03 100644 --- a/sysdeps/aarch64/multiarch/memset_kunpeng.S +++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <sysdeps/aarch64/memset-reg.h> /* Assumptions: * @@ -26,6 +25,12 @@ * */ +#define dstin x0 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 + ENTRY (__memset_kunpeng) PTR_ARG (0) diff --git a/sysdeps/aarch64/multiarch/memset_oryon1.S b/sysdeps/aarch64/multiarch/memset_oryon1.S index b43a43b54e..6fa28a9bd0 100644 --- a/sysdeps/aarch64/multiarch/memset_oryon1.S +++ b/sysdeps/aarch64/multiarch/memset_oryon1.S @@ -19,12 +19,18 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "memset-reg.h" /* Assumptions: ARMv8-a, AArch64, unaligned accesses */ +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 + ENTRY (__memset_oryon1) PTR_ARG (0) diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps index e28c2af683..212c52c8cc 100644 --- a/sysdeps/alpha/fpu/libm-test-ulps +++ b/sysdeps/alpha/fpu/libm-test-ulps @@ -1034,22 +1034,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1206,22 +1202,18 @@ ldouble: 3 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 2 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1271,22 +1263,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 2 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1410,22 +1398,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/arc/bits/endianness.h b/sysdeps/arc/bits/endianness.h index 8f17ca84b4..4a6e7d4c15 100644 --- a/sysdeps/arc/bits/endianness.h +++ b/sysdeps/arc/bits/endianness.h @@ -5,11 +5,7 @@ # error "Never use <bits/endian.h> directly; include <endian.h> instead." #endif -/* ARC has selectable endianness. */ -#ifdef __BIG_ENDIAN__ -# define __BYTE_ORDER __BIG_ENDIAN -#else -# define __BYTE_ORDER __LITTLE_ENDIAN -#endif +/* Only little-endian ARC is supported. */ +#define __BYTE_ORDER __LITTLE_ENDIAN #endif /* bits/endianness.h */ diff --git a/sysdeps/arc/configure b/sysdeps/arc/configure index 08e4f23f89..b4192983bf 100644 --- a/sysdeps/arc/configure +++ b/sysdeps/arc/configure @@ -10,7 +10,7 @@ libc_cv_asm_line_sep='`' printf "%s\n" "#define ASM_LINE_SEP $libc_cv_asm_line_sep" >>confdefs.h -# For big endian ABI, generate a symbol for selecting right dynamic linker +# ARC big endian ABI is no longer supported. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep -e" >&5 @@ -175,13 +175,8 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arc_be" >&5 printf "%s\n" "$libc_cv_arc_be" >&6; } if test $libc_cv_arc_be = yes; then - # For shlib-versions. - printf "%s\n" "#define HAVE_ARC_BE 1" >>confdefs.h - - config_vars="$config_vars -default-abi = arcbe" -else - config_vars="$config_vars -default-abi = arcle" + as_fn_error $? "big-endian not supported" "$LINENO" 5 fi +config_vars="$config_vars +default-abi = arcle" diff --git a/sysdeps/arc/configure.ac b/sysdeps/arc/configure.ac index 619da4e088..48dc6f5b9b 100644 --- a/sysdeps/arc/configure.ac +++ b/sysdeps/arc/configure.ac @@ -9,7 +9,7 @@ libc_cv_have_sdata_section=no libc_cv_asm_line_sep='`' AC_DEFINE_UNQUOTED(ASM_LINE_SEP, $libc_cv_asm_line_sep) -# For big endian ABI, generate a symbol for selecting right dynamic linker +# ARC big endian ABI is no longer supported. AC_CACHE_CHECK([for big endian], [libc_cv_arc_be], [AC_EGREP_CPP(yes,[#ifdef __BIG_ENDIAN__ @@ -17,9 +17,6 @@ AC_CACHE_CHECK([for big endian], #endif ], libc_cv_arc_be=yes, libc_cv_arc_be=no)]) if test $libc_cv_arc_be = yes; then - # For shlib-versions. - AC_DEFINE(HAVE_ARC_BE) - LIBC_CONFIG_VAR([default-abi], [arcbe]) -else - LIBC_CONFIG_VAR([default-abi], [arcle]) + AC_MSG_ERROR([big-endian not supported]) fi +LIBC_CONFIG_VAR([default-abi], [arcle]) diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps index 7e2c2dff13..7812a11b5b 100644 --- a/sysdeps/arc/fpu/libm-test-ulps +++ b/sysdeps/arc/fpu/libm-test-ulps @@ -789,19 +789,15 @@ float: 1 Function: "exp10m1": double: 4 -float: 2 Function: "exp10m1_downward": double: 3 -float: 3 Function: "exp10m1_towardzero": double: 2 -float: 3 Function: "exp10m1_upward": double: 5 -float: 3 Function: "exp2": double: 1 @@ -819,19 +815,15 @@ float: 1 Function: "exp2m1": double: 2 -float: 2 Function: "exp2m1_downward": double: 3 -float: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 Function: "exp2m1_upward": double: 3 -float: 3 Function: "exp_downward": double: 1 @@ -847,19 +839,15 @@ float: 2 Function: "expm1": double: 2 -float: 2 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 2 -float: 2 Function: "gamma": double: 7 @@ -977,35 +965,27 @@ float: 4 Function: "log10p1": double: 2 -float: 2 Function: "log10p1_downward": double: 2 -float: 3 Function: "log10p1_towardzero": double: 3 -float: 2 Function: "log10p1_upward": double: 2 -float: 3 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 1 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": double: 2 @@ -1021,19 +1001,15 @@ float: 1 Function: "log2p1": double: 2 -float: 2 Function: "log2p1_downward": double: 2 -float: 2 Function: "log2p1_towardzero": double: 2 -float: 2 Function: "log2p1_upward": double: 2 -float: 2 Function: "log_towardzero": double: 1 @@ -1045,19 +1021,15 @@ float: 1 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 1 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1157,19 +1129,15 @@ float: 3 Function: "tgamma": double: 9 -float: 9 Function: "tgamma_downward": double: 9 -float: 9 Function: "tgamma_towardzero": double: 9 -float: 8 Function: "tgamma_upward": double: 9 -float: 9 Function: "y0": double: 3 diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps index d9d6c76c3e..d0cfa46c3d 100644 --- a/sysdeps/arc/nofpu/libm-test-ulps +++ b/sysdeps/arc/nofpu/libm-test-ulps @@ -194,18 +194,15 @@ float: 1 Function: "exp10m1": double: 3 -float: 2 Function: "exp2": double: 1 Function: "exp2m1": double: 2 -float: 2 Function: "expm1": double: 1 -float: 1 Function: "gamma": double: 4 @@ -237,11 +234,9 @@ float: 2 Function: "log10p1": double: 1 -float: 2 Function: "log1p": double: 1 -float: 1 Function: "log2": double: 2 @@ -249,11 +244,9 @@ float: 1 Function: "log2p1": double: 1 -float: 2 Function: "logp1": double: 1 -float: 1 Function: "pow": double: 1 @@ -279,7 +272,6 @@ float: 2 Function: "tgamma": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps index 100e9d1956..6cdd3d53d6 100644 --- a/sysdeps/arm/libm-test-ulps +++ b/sysdeps/arm/libm-test-ulps @@ -574,6 +574,9 @@ Function: Real part of "csin": double: 1 float: 1 +Function: Imaginary part of "csin": +float: 1 + Function: Real part of "csin_downward": double: 3 float: 3 @@ -779,19 +782,15 @@ float: 1 Function: "exp10m1": double: 3 -float: 2 Function: "exp10m1_downward": double: 3 -float: 3 Function: "exp10m1_towardzero": double: 2 -float: 3 Function: "exp10m1_upward": double: 4 -float: 4 Function: "exp2": double: 1 @@ -811,19 +810,15 @@ float: 1 Function: "exp2m1": double: 2 -float: 2 Function: "exp2m1_downward": double: 3 -float: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 Function: "exp2m1_upward": double: 3 -float: 3 Function: "exp_downward": double: 1 @@ -839,19 +834,15 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -967,35 +958,27 @@ float: 2 Function: "log10p1": double: 1 -float: 2 Function: "log10p1_downward": double: 2 -float: 3 Function: "log10p1_towardzero": double: 3 -float: 2 Function: "log10p1_upward": double: 2 -float: 3 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 2 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": double: 2 @@ -1015,19 +998,15 @@ float: 3 Function: "log2p1": double: 1 -float: 2 Function: "log2p1_downward": double: 2 -float: 2 Function: "log2p1_towardzero": double: 2 -float: 2 Function: "log2p1_upward": double: 2 -float: 2 Function: "log_downward": float: 2 @@ -1041,19 +1020,15 @@ float: 2 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 2 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1152,19 +1127,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps index fc634f89ca..a7b2bec17e 100644 --- a/sysdeps/csky/fpu/libm-test-ulps +++ b/sysdeps/csky/fpu/libm-test-ulps @@ -799,19 +799,15 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -923,19 +919,15 @@ float: 2 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 2 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": double: 2 @@ -952,19 +944,15 @@ double: 3 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 2 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1061,19 +1049,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 8 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps index a1e28c8ee0..4e4451a5d2 100644 --- a/sysdeps/csky/nofpu/libm-test-ulps +++ b/sysdeps/csky/nofpu/libm-test-ulps @@ -797,19 +797,15 @@ double: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -924,19 +920,15 @@ float: 2 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 2 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": double: 2 @@ -966,19 +958,15 @@ float: 2 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 2 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1092,19 +1080,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 5 -float: 5 Function: "tgamma_towardzero": double: 5 -float: 4 Function: "tgamma_upward": double: 4 -float: 4 Function: "y0": double: 3 diff --git a/sysdeps/generic/internal-signals.h b/sysdeps/generic/internal-signals.h index 3db100be10..e031a96bac 100644 --- a/sysdeps/generic/internal-signals.h +++ b/sysdeps/generic/internal-signals.h @@ -20,6 +20,7 @@ # define __INTERNAL_SIGNALS_H #include <signal.h> +#include <internal-sigset.h> #include <sigsetops.h> #include <stdbool.h> #include <stddef.h> @@ -39,10 +40,32 @@ clear_internal_signals (sigset_t *set) { } -typedef sigset_t internal_sigset_t; - #define internal_sigemptyset(__s) __sigemptyset (__s) +#define internal_sigfillset(__s) __sigfillset (__s) #define internal_sigaddset(__s, __i) __sigaddset (__s, __i) #define internal_sigprocmask(__h, __s, __o) __sigprocmask (__h, __s, __o) +static inline void +internal_signal_block_all (internal_sigset_t *oset) +{ + internal_sigset_t set; + internal_sigfillset (&set); + internal_sigprocmask (SIG_BLOCK, &set, oset); +} + +static inline void +internal_signal_restore_set (const internal_sigset_t *set) +{ + internal_sigprocmask (SIG_SETMASK, set, NULL); +} + +static inline void +internal_signal_unblock_signal (int sig) +{ + internal_sigset_t set; + internal_sigemptyset (&set); + internal_sigaddset (&set, sig); + internal_sigprocmask (SIG_UNBLOCK, &set, NULL); +} + #endif /* __INTERNAL_SIGNALS_H */ diff --git a/sysdeps/aarch64/memset-reg.h b/sysdeps/generic/internal-sigset.h index 6c7f60b37e..80279ffc47 100644 --- a/sysdeps/aarch64/memset-reg.h +++ b/sysdeps/generic/internal-sigset.h @@ -1,5 +1,5 @@ -/* Register aliases for memset to be used across implementations. - Copyright (C) 2017-2024 Free Software Foundation, Inc. +/* Internal sigset_t definition. + Copyright (C) 2022-2023 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,15 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define dstin x0 -#define val x1 -#define valw w1 -#define count x2 -#define dst x3 -#define dstend x4 -#define tmp1 x5 -#define tmp1w w5 -#define tmp2 x6 -#define tmp2w w6 -#define zva_len x7 -#define zva_lenw w7 +#ifndef _INTERNAL_SIGSET_H +#define _INTERNAL_SIGSET_H + +#include <signal.h> + +typedef sigset_t internal_sigset_t; + +#endif diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 259ce2e7d6..91447a5e77 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -1014,6 +1014,13 @@ extern void _dl_relocate_object (struct link_map *map, int reloc_mode, int consider_profiling) attribute_hidden; +/* Perform relocation, but do not apply RELRO. Does not check + L->relocated. Otherwise the same as _dl_relocate_object. */ +void _dl_relocate_object_no_relro (struct link_map *map, + struct r_scope_elem *scope[], + int reloc_mode, int consider_profiling) + attribute_hidden; + /* Protect PT_GNU_RELRO area. */ extern void _dl_protect_relro (struct link_map *map) attribute_hidden; diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps index ea5101f6b6..021a2a482c 100644 --- a/sysdeps/hppa/fpu/libm-test-ulps +++ b/sysdeps/hppa/fpu/libm-test-ulps @@ -797,20 +797,16 @@ double: 2 float: 1 Function: "exp10m1": -double: 2 -float: 1 +double: 4 Function: "exp10m1_downward": -double: 1 -float: 1 +double: 3 Function: "exp10m1_towardzero": -double: 1 -float: 1 +double: 2 Function: "exp10m1_upward": -double: 3 -float: 1 +double: 5 Function: "exp2": double: 1 @@ -829,20 +825,16 @@ double: 1 float: 1 Function: "exp2m1": -double: 1 -float: 1 +double: 2 Function: "exp2m1_downward": -double: 2 -float: 1 +double: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 +double: 3 Function: "exp2m1_upward": -double: 1 -float: 1 +double: 3 Function: "exp_downward": double: 1 @@ -858,20 +850,16 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -992,36 +980,28 @@ double: 2 float: 2 Function: "log10p1": -double: 1 -float: 1 +double: 2 Function: "log10p1_downward": double: 2 -float: 1 Function: "log10p1_towardzero": -double: 2 -float: 2 +double: 3 Function: "log10p1_upward": double: 2 -float: 1 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 2 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": double: 2 @@ -1041,19 +1021,15 @@ float: 3 Function: "log2p1": double: 2 -float: 2 Function: "log2p1_downward": double: 2 -float: 2 Function: "log2p1_towardzero": double: 2 -float: 2 Function: "log2p1_upward": double: 2 -float: 2 Function: "log_downward": float: 2 @@ -1067,19 +1043,15 @@ float: 2 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 2 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1181,20 +1153,16 @@ float: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 1 Function: "tgamma_downward": double: 9 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/htl/pthreadP.h b/sysdeps/htl/pthreadP.h index cf8a2efe86..b0c9ceb23a 100644 --- a/sysdeps/htl/pthreadP.h +++ b/sysdeps/htl/pthreadP.h @@ -92,6 +92,8 @@ int __pthread_attr_setstack (pthread_attr_t *__attr, void *__stackaddr, int __pthread_attr_getstack (const pthread_attr_t *, void **, size_t *); void __pthread_testcancel (void); +#define __pthread_raise_internal(__sig) raise (__sig) + libc_hidden_proto (__pthread_self) #if IS_IN (libpthread) diff --git a/sysdeps/i386/fpu/e_log10f.S b/sysdeps/i386/fpu/e_log10f.S deleted file mode 100644 index 47f82e1993..0000000000 --- a/sysdeps/i386/fpu/e_log10f.S +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Public domain. - */ - -#include <machine/asm.h> -#include <libm-alias-finite.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log10f) - fldlg2 // log10(2) - flds 4(%esp) // x : log10(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fxam - fnstsw - fld %st // x : x : log10(2) - sahf - jc 3f // in case x is NaN or ±Inf -4: fsubl MO(one) // x-1 : x : log10(2) - fld %st // x-1 : x-1 : x : log10(2) - fabs // |x-1| : x-1 : x : log10(2) - fcompl MO(limit) // x-1 : x : log10(2) - fnstsw // x-1 : x : log10(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log10(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log10(2) - fyl2xp1 // log10(x) - ret - -2: fstp %st(0) // x : log10(2) - fyl2x // log10(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_log10f) -libm_alias_finite (__ieee754_log10f, __log10f) diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index 7da13797ca..a77ded2648 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -1155,25 +1155,21 @@ ldouble: 2 Function: "exp10m1": double: 3 -float: 1 float128: 3 ldouble: 4 Function: "exp10m1_downward": double: 3 -float: 3 float128: 6 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 2 float128: 6 ldouble: 5 Function: "exp10m1_upward": double: 5 -float: 3 float128: 6 ldouble: 6 @@ -1198,25 +1194,21 @@ ldouble: 1 Function: "exp2m1": double: 2 -float: 1 float128: 2 ldouble: 3 Function: "exp2m1_downward": double: 3 -float: 3 float128: 3 ldouble: 6 Function: "exp2m1_towardzero": double: 3 -float: 2 float128: 4 ldouble: 5 Function: "exp2m1_upward": double: 3 -float: 3 float128: 5 ldouble: 6 @@ -1242,19 +1234,16 @@ ldouble: 3 Function: "expm1_downward": double: 1 -float: 1 float128: 2 ldouble: 4 Function: "expm1_towardzero": double: 1 -float: 1 float128: 4 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 float128: 3 ldouble: 4 @@ -1425,25 +1414,21 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 2 float128: 3 ldouble: 4 Function: "log10p1_downward": double: 2 -float: 1 float128: 4 ldouble: 8 Function: "log10p1_towardzero": double: 2 -float: 1 float128: 3 ldouble: 8 Function: "log10p1_upward": double: 2 -float: 2 float128: 4 ldouble: 6 @@ -1454,19 +1439,16 @@ ldouble: 2 Function: "log1p_downward": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "log1p_towardzero": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "log1p_upward": double: 1 -float: 1 float128: 2 ldouble: 3 @@ -1495,25 +1477,21 @@ ldouble: 1 Function: "log2p1": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "log2p1_downward": double: 2 -float: 2 float128: 3 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 float128: 2 ldouble: 3 Function: "log2p1_upward": -double: 1 -float: 2 +double: 2 float128: 3 ldouble: 5 @@ -1539,19 +1517,16 @@ ldouble: 2 Function: "logp1_downward": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "logp1_towardzero": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "logp1_upward": double: 1 -float: 1 float128: 2 ldouble: 3 @@ -1699,25 +1674,21 @@ ldouble: 4 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/i386/fpu/s_expm1f.S b/sysdeps/i386/fpu/s_expm1f.S deleted file mode 100644 index a1cb5bf0ab..0000000000 --- a/sysdeps/i386/fpu/s_expm1f.S +++ /dev/null @@ -1,112 +0,0 @@ -/* ix87 specific implementation of exp(x)-1. - Copyright (C) 1996-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - - /* Using: e^x - 1 = 2^(x * log2(e)) - 1 */ - -#include <sysdep.h> -#include <machine/asm.h> -#include <i386-math-asm.h> -#include <libm-alias-float.h> - - .section .rodata - - .align ALIGNARG(4) - .type minus1,@object -minus1: .double -1.0 - ASM_SIZE_DIRECTIVE(minus1) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type l2e,@object -l2e: .quad 0xb8aa3b295c17f0bc /* 1.442695040888963407359924681002 */ - .short 0x3fff - ASM_SIZE_DIRECTIVE(l2e) - -DEFINE_FLT_MIN - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__expm1f) - movzwl 4+2(%esp), %eax - xorb $0x80, %ah // invert sign bit (now 1 is "positive") - cmpl $0xc2b1, %eax // is num >= 88.5? - jae HIDDEN_JUMPTARGET (__expf) - - flds 4(%esp) // x - fxam // Is NaN, +-Inf or +-0? - xorb $0x80, %ah - cmpl $0xc190, %eax // is num <= -18.0? - fstsw %ax - movb $0x45, %ch - jb 4f - - // Below -18.0 (may be -NaN or -Inf). - andb %ah, %ch -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpb $0x01, %ch - je 5f // If -NaN, jump. - jmp 2f // -large, possibly -Inf. - -4: // In range -18.0 to 88.5 (may be +-0 but not NaN or +-Inf). - andb %ah, %ch - cmpb $0x40, %ch - je 3f // If +-0, jump. -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - -5: fldt MO(l2e) // log2(e) : x - fmulp // log2(e)*x - fld %st // log2(e)*x : log2(e)*x - // Set round-to-nearest temporarily. - subl $8, %esp - cfi_adjust_cfa_offset (8) - fstcw 4(%esp) - movl $0xf3ff, %ecx - andl 4(%esp), %ecx - movl %ecx, (%esp) - fldcw (%esp) - frndint // int(log2(e)*x) : log2(e)*x - fldcw 4(%esp) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - fsubr %st, %st(1) // int(log2(e)*x) : fract(log2(e)*x) - fxch // fract(log2(e)*x) : int(log2(e)*x) - f2xm1 // 2^fract(log2(e)*x)-1 : int(log2(e)*x) - fscale // 2^(log2(e)*x)-2^int(log2(e)*x) : int(log2(e)*x) - fxch // int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fldl MO(one) // 1 : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fscale // 2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fsubrl MO(one) // 1-2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fstp %st(1) // 1-2^int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fsubrp %st, %st(1) // 2^(log2(e)*x) - FLT_CHECK_FORCE_UFLOW - ret - -2: fstp %st - fldl MO(minus1) // Set result to -1.0. -3: ret -END(__expm1f) -libm_alias_float (__expm1, expm1) diff --git a/sysdeps/i386/fpu/s_log1pf.S b/sysdeps/i386/fpu/s_log1pf.S deleted file mode 100644 index 82a3f448db..0000000000 --- a/sysdeps/i386/fpu/s_log1pf.S +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_log1pf.S,v 1.4 1995/05/09 00:13:05 jtc Exp $") - - .section .rodata - - .align ALIGNARG(4) - /* The fyl2xp1 can only be used for values in - -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2 - 0.29 is a safe value. - */ -limit: .float 0.29 -one: .float 1.0 - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - -/* - * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29, - * otherwise fyl2x with the needed extra computation. - */ - .text -ENTRY(__log1pf) - fldln2 - - flds 4(%esp) - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - fxam - fnstsw - fld %st - sahf - jc 3f // in case x is NaN or ±Inf -4: fabs - fcomps MO(limit) - fnstsw - sahf - jc 2f - - fadds MO(one) - fyl2x - ret - -2: fyl2xp1 - FLT_CHECK_FORCE_UFLOW_NONNAN - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret - -END (__log1pf) diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps index 5c41bf36dc..a9cd01bf03 100644 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps @@ -1158,25 +1158,21 @@ ldouble: 2 Function: "exp10m1": double: 3 -float: 1 float128: 3 ldouble: 4 Function: "exp10m1_downward": double: 3 -float: 3 float128: 6 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 2 float128: 6 ldouble: 5 Function: "exp10m1_upward": double: 5 -float: 3 float128: 6 ldouble: 6 @@ -1201,25 +1197,21 @@ ldouble: 1 Function: "exp2m1": double: 2 -float: 1 float128: 2 ldouble: 3 Function: "exp2m1_downward": double: 3 -float: 3 float128: 3 ldouble: 6 Function: "exp2m1_towardzero": double: 3 -float: 2 float128: 4 ldouble: 6 Function: "exp2m1_upward": double: 3 -float: 3 float128: 5 ldouble: 6 @@ -1245,19 +1237,16 @@ ldouble: 3 Function: "expm1_downward": double: 1 -float: 1 float128: 2 ldouble: 4 Function: "expm1_towardzero": double: 1 -float: 1 float128: 4 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 float128: 3 ldouble: 4 @@ -1428,25 +1417,21 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 2 float128: 3 ldouble: 4 Function: "log10p1_downward": double: 2 -float: 1 float128: 4 ldouble: 8 Function: "log10p1_towardzero": double: 2 -float: 1 float128: 3 ldouble: 8 Function: "log10p1_upward": double: 2 -float: 2 float128: 4 ldouble: 6 @@ -1457,19 +1442,16 @@ ldouble: 2 Function: "log1p_downward": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "log1p_towardzero": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "log1p_upward": double: 1 -float: 1 float128: 2 ldouble: 3 @@ -1498,25 +1480,21 @@ ldouble: 1 Function: "log2p1": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "log2p1_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log2p1_towardzero": double: 2 -float: 2 float128: 2 ldouble: 4 Function: "log2p1_upward": double: 1 -float: 2 float128: 3 ldouble: 5 @@ -1542,19 +1520,16 @@ ldouble: 2 Function: "logp1_downward": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "logp1_towardzero": double: 1 -float: 1 float128: 3 ldouble: 4 Function: "logp1_upward": double: 1 -float: 1 float128: 2 ldouble: 3 @@ -1701,25 +1676,21 @@ ldouble: 4 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 8 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c index 9d17f67a10..6c3fbc0290 100644 --- a/sysdeps/i386/i686/multiarch/s_fma-fma.c +++ b/sysdeps/i386/i686/multiarch/s_fma-fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c index 978345bda5..3ae111aac5 100644 --- a/sysdeps/i386/i686/multiarch/s_fma.c +++ b/sysdeps/i386/i686/multiarch/s_fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c index f049a60f57..aff92fa5b6 100644 --- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c +++ b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c index e92ebb19a7..c9be68257f 100644 --- a/sysdeps/i386/i686/multiarch/s_fmaf.c +++ b/sysdeps/i386/i686/multiarch/s_fmaf.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/ieee754/dbl-64/gamma_productf.c b/sysdeps/ieee754/dbl-64/gamma_productf.c index f3596eeae4..1cc8931700 100644 --- a/sysdeps/ieee754/dbl-64/gamma_productf.c +++ b/sysdeps/ieee754/dbl-64/gamma_productf.c @@ -1,44 +1 @@ -/* Compute a product of X, X+1, ..., with an error estimate. - Copyright (C) 2013-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <math.h> -#include <math-narrow-eval.h> -#include <math_private.h> -#include <float.h> - -/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N - - 1, in the form R * (1 + *EPS) where the return value R is an - approximation to the product and *EPS is set to indicate the - approximate error in the return value. X is such that all the - values X + 1, ..., X + N - 1 are exactly representable, and X_EPS / - X is small enough that factors quadratic in it can be - neglected. */ - -float -__gamma_productf (float x, float x_eps, int n, float *eps) -{ - double x_full = (double) x + (double) x_eps; - double ret = x_full; - for (int i = 1; i < n; i++) - ret *= x_full + i; - - float fret = math_narrow_eval ((float) ret); - *eps = (ret - fret) / fret; - - return fret; -} +/* Not needed. */ diff --git a/sysdeps/ieee754/flt-32/e_gammaf_r.c b/sysdeps/ieee754/flt-32/e_gammaf_r.c index a9730d61c1..6b1f95d50f 100644 --- a/sysdeps/ieee754/flt-32/e_gammaf_r.c +++ b/sysdeps/ieee754/flt-32/e_gammaf_r.c @@ -1,215 +1,176 @@ -/* Implementation of gamma function according to ISO C. - Copyright (C) 1997-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. +/* Implementation of the gamma function for binary32. - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. +Copyright (c) 2023-2024 Alexei Sibidanov. - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. +The original version of this file was copied from the CORE-MATH +project (file src/binary32/tgamma/tgammaf.c, revision a48e352). - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + */ + +/* Changes with respect to the original CORE-MATH code: + - removed the dealing with errno + (this is done in the wrapper math/w_tgammaf_compat.c) + - usage of math_narrow_eval to deal with underflow/overflow + - deal with signgamp + */ #include <math.h> -#include <math-narrow-eval.h> -#include <math_private.h> -#include <fenv_private.h> -#include <math-underflow.h> #include <float.h> +#include <stdint.h> +#include <stddef.h> #include <libm-alias-finite.h> +#include <math-narrow-eval.h> +#include "math_config.h" -/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) inside exp in Stirling's - approximation to gamma function. */ +float +__ieee754_gammaf_r (float x, int *signgamp) +{ + /* The wrapper in math/w_tgamma_template.c expects *signgamp to be set to a + non-negative value if the returned value is gamma(x), and to a negative + value if it is -gamma(x). + Since the code here directly computes gamma(x), we set it to 1. + */ + if (signgamp != NULL) + *signgamp = 1; -static const float gamma_coeff[] = + /* List of exceptional cases. Each entry contains the 32-bit encoding u of x, + a binary32 approximation f of gamma(x), and a correction term df. */ + static const struct { - 0x1.555556p-4f, - -0xb.60b61p-12f, - 0x3.403404p-12f, + uint32_t u; + float f, df; + } tb[] = { + { 0x27de86a9u, 0x1.268266p+47f, 0x1p22f }, /* x = 0x1.bd0d52p-48 */ + { 0x27e05475u, 0x1.242422p+47f, 0x1p22f }, /* x = 0x1.c0a8eap-48 */ + { 0xb63befb3u, -0x1.5cb6e4p+18f, 0x1p-7f }, /* x = -0x1.77df66p-19 */ + { 0x3c7bb570u, 0x1.021d9p+6f, 0x1p-19f }, /* x = 0x1.f76aep-7 */ + { 0x41e886d1u, 0x1.33136ap+98f, 0x1p73f }, /* x = 0x1.d10da2p+4 */ + { 0xc067d177u, 0x1.f6850cp-3f, 0x1p-28f }, /* x = -0x1.cfa2eep+1 */ + { 0xbd99da31u, -0x1.befe66p+3, -0x1p-22f }, /* x = -0x1.33b462p-4 */ + { 0xbf54c45au, -0x1.a6b4ecp+2, 0x1p-23f }, /* x = -0x1.a988b4p-1 */ + { 0x41ee77feu, 0x1.d3631cp+101, -0x1p-76f }, /* x = 0x1.dceffcp+4 */ + { 0x3f843a64u, 0x1.f6c638p-1, 0x1p-26f }, /* x = 0x1.0874c8p+0 */ }; -#define NCOEFF (sizeof (gamma_coeff) / sizeof (gamma_coeff[0])) - -/* Return gamma (X), for positive X less than 42, in the form R * - 2^(*EXP2_ADJ), where R is the return value and *EXP2_ADJ is set to - avoid overflow or underflow in intermediate calculations. */ - -static float -gammaf_positive (float x, int *exp2_adj) -{ - int local_signgam; - if (x < 0.5f) - { - *exp2_adj = 0; - return __ieee754_expf (__ieee754_lgammaf_r (x + 1, &local_signgam)) / x; - } - else if (x <= 1.5f) - { - *exp2_adj = 0; - return __ieee754_expf (__ieee754_lgammaf_r (x, &local_signgam)); - } - else if (x < 2.5f) - { - *exp2_adj = 0; - float x_adj = x - 1; - return (__ieee754_expf (__ieee754_lgammaf_r (x_adj, &local_signgam)) - * x_adj); - } - else - { - float eps = 0; - float x_eps = 0; - float x_adj = x; - float prod = 1; - if (x < 4.0f) - { - /* Adjust into the range for applying Stirling's - approximation. */ - float n = ceilf (4.0f - x); - x_adj = math_narrow_eval (x + n); - x_eps = (x - (x_adj - n)); - prod = __gamma_productf (x_adj - n, x_eps, n, &eps); + uint32_t t = asuint (x); + uint32_t ax = t << 1; + if (__glibc_unlikely (ax >= (0xffu << 24))) + { /* x=NaN or +/-Inf */ + if (ax == (0xffu << 24)) + { /* x=+/-Inf */ + if (t >> 31) /* x=-Inf */ + return __math_invalidf (x); + return x; /* x=+Inf */ } - /* The result is now gamma (X_ADJ + X_EPS) / (PROD * (1 + EPS)). - Compute gamma (X_ADJ + X_EPS) using Stirling's approximation, - starting by computing pow (X_ADJ, X_ADJ) with a power of 2 - factored out. */ - float exp_adj = -eps; - float x_adj_int = roundf (x_adj); - float x_adj_frac = x_adj - x_adj_int; - int x_adj_log2; - float x_adj_mant = __frexpf (x_adj, &x_adj_log2); - if (x_adj_mant < M_SQRT1_2f) + return x + x; /* x=NaN, where x+x ensures the "Invalid operation" + exception is set if x is sNaN */ + } + double z = x; + if (__glibc_unlikely (ax < 0x6d000000u)) + { /* |x| < 0x1p-18 */ + volatile double d = (0x1.fa658c23b1578p-1 - 0x1.d0a118f324b63p-1 * z) + * z - 0x1.2788cfc6fb619p-1; + double f = 1.0 / z + d; + float r = f; + uint64_t rt = asuint64 (f); + if (((rt + 2) & 0xfffffff) < 4) { - x_adj_log2--; - x_adj_mant *= 2.0f; + for (unsigned i = 0; i < sizeof (tb) / sizeof (tb[0]); i++) + if (t == tb[i].u) + return tb[i].f + tb[i].df; } - *exp2_adj = x_adj_log2 * (int) x_adj_int; - float ret = (__ieee754_powf (x_adj_mant, x_adj) - * __ieee754_exp2f (x_adj_log2 * x_adj_frac) - * __ieee754_expf (-x_adj) - * sqrtf (2 * M_PIf / x_adj) - / prod); - exp_adj += x_eps * __ieee754_logf (x_adj); - float bsum = gamma_coeff[NCOEFF - 1]; - float x_adj2 = x_adj * x_adj; - for (size_t i = 1; i <= NCOEFF - 1; i++) - bsum = bsum / x_adj2 + gamma_coeff[NCOEFF - 1 - i]; - exp_adj += bsum / x_adj; - return ret + ret * __expm1f (exp_adj); + return r; } -} - -float -__ieee754_gammaf_r (float x, int *signgamp) -{ - int32_t hx; - float ret; - - GET_FLOAT_WORD (hx, x); - - if (__glibc_unlikely ((hx & 0x7fffffff) == 0)) + float fx = floorf (x); + if (__glibc_unlikely (x >= 0x1.18522p+5f)) { - /* Return value for x == 0 is Inf with divide by zero exception. */ - *signgamp = 0; - return 1.0 / x; + /* Overflow case. The original CORE-MATH code returns + 0x1p127f * 0x1p127f, but apparently some compilers replace this + by +Inf. */ + return math_narrow_eval (x * 0x1p127f); } - if (__builtin_expect (hx < 0, 0) - && (uint32_t) hx < 0xff800000 && rintf (x) == x) - { - /* Return value for integer x < 0 is NaN with invalid exception. */ - *signgamp = 0; - return (x - x) / (x - x); + /* compute k only after the overflow check, otherwise the case to integer + might overflow */ + int k = fx; + if (__glibc_unlikely (fx == x)) + { /* x is integer */ + if (x == 0.0f) + return 1.0f / x; + if (x < 0.0f) + return __math_invalidf (0.0f); + double t0 = 1, x0 = 1; + for (int i = 1; i < k; i++, x0 += 1.0) + t0 *= x0; + return t0; } - if (__glibc_unlikely (hx == 0xff800000)) - { - /* x == -Inf. According to ISO this is NaN. */ - *signgamp = 0; - return x - x; + if (__glibc_unlikely (x < -42.0f)) + { /* negative non-integer */ + /* For x < -42, x non-integer, |gamma(x)| < 2^-151. */ + static const float sgn[2] = { 0x1p-127f, -0x1p-127f }; + /* Underflows always happens */ + return math_narrow_eval (0x1p-127f * sgn[k & 1]); } - if (__glibc_unlikely ((hx & 0x7f800000) == 0x7f800000)) + /* The array c[] stores a degree-15 polynomial approximation for + gamma(x). */ + static const double c[] = { - /* Positive infinity (return positive infinity) or NaN (return - NaN). */ - *signgamp = 0; - return x + x; - } + 0x1.c9a76be577123p+0, 0x1.8f2754ddcf90dp+0, 0x1.0d1191949419bp+0, + 0x1.e1f42cf0ae4a1p-2, 0x1.82b358a3ab638p-3, 0x1.e1f2b30cd907bp-5, + 0x1.240f6d4071bd8p-6, 0x1.1522c9f3cd012p-8, 0x1.1fd0051a0525bp-10, + 0x1.9808a8b96c37ep-13, 0x1.b3f78e01152b5p-15, 0x1.49c85a7e1fd04p-18, + 0x1.471ca49184475p-19, -0x1.368f0b7ed9e36p-23, 0x1.882222f9049efp-23, + -0x1.a69ed2042842cp-25 + }; - if (x >= 36.0f) - { - /* Overflow. */ - *signgamp = 0; - ret = math_narrow_eval (FLT_MAX * FLT_MAX); - return ret; - } - else + double m = z - 0x1.7p+1; + double i = roundeven (m); + double step = copysign (1.0, i); + double d = m - i, d2 = d * d, d4 = d2 * d2, d8 = d4 * d4; + double f = (c[0] + d * c[1]) + d2 * (c[2] + d * c[3]) + + d4 * ((c[4] + d * c[5]) + d2 * (c[6] + d * c[7])) + + d8 * ((c[8] + d * c[9]) + d2 * (c[10] + d * c[11]) + + d4 * ((c[12] + d * c[13]) + d2 * (c[14] + d * c[15]))); + int jm = fabs (i); + double w = 1; + if (jm) { - SET_RESTORE_ROUNDF (FE_TONEAREST); - if (x > 0.0f) + z -= 0.5 + step * 0.5; + w = z; + for (int j = jm - 1; j; j--) { - *signgamp = 0; - int exp2_adj; - float tret = gammaf_positive (x, &exp2_adj); - ret = __scalbnf (tret, exp2_adj); + z -= step; + w *= z; } - else if (x >= -FLT_EPSILON / 4.0f) - { - *signgamp = 0; - ret = 1.0f / x; - } - else - { - float tx = truncf (x); - *signgamp = (tx == 2.0f * truncf (tx / 2.0f)) ? -1 : 1; - if (x <= -42.0f) - /* Underflow. */ - ret = FLT_MIN * FLT_MIN; - else - { - float frac = tx - x; - if (frac > 0.5f) - frac = 1.0f - frac; - float sinpix = (frac <= 0.25f - ? __sinf (M_PIf * frac) - : __cosf (M_PIf * (0.5f - frac))); - int exp2_adj; - float tret = M_PIf / (-x * sinpix - * gammaf_positive (-x, &exp2_adj)); - ret = __scalbnf (tret, -exp2_adj); - math_check_force_underflow_nonneg (ret); - } - } - ret = math_narrow_eval (ret); - } - if (isinf (ret) && x != 0) - { - if (*signgamp < 0) - { - ret = math_narrow_eval (-copysignf (FLT_MAX, ret) * FLT_MAX); - ret = -ret; - } - else - ret = math_narrow_eval (copysignf (FLT_MAX, ret) * FLT_MAX); - return ret; } - else if (ret == 0) + if (i <= -0.5) + w = 1 / w; + f *= w; + uint64_t rt = asuint64 (f); + float r = f; + /* Deal with exceptional cases. */ + if (__glibc_unlikely (((rt + 2) & 0xfffffff) < 8)) { - if (*signgamp < 0) - { - ret = math_narrow_eval (-copysignf (FLT_MIN, ret) * FLT_MIN); - ret = -ret; - } - else - ret = math_narrow_eval (copysignf (FLT_MIN, ret) * FLT_MIN); - return ret; + for (unsigned j = 0; j < sizeof (tb) / sizeof (tb[0]); j++) + if (t == tb[j].u) + return tb[j].f + tb[j].df; } - else - return ret; + return r; } libm_alias_finite (__ieee754_gammaf_r, __gammaf_r) diff --git a/sysdeps/ieee754/flt-32/e_log10f.c b/sysdeps/ieee754/flt-32/e_log10f.c index 791895e042..03d9e281f3 100644 --- a/sysdeps/ieee754/flt-32/e_log10f.c +++ b/sysdeps/ieee754/flt-32/e_log10f.c @@ -1,54 +1,161 @@ -/* e_log10f.c -- float version of e_log10.c. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +/* Correctly-rounded radix-10 logarithm function for binary32 value. + +Copyright (c) 2022-2023 Alexei Sibidanov. + +This file is part of the CORE-MATH project +project (file src/binary32/log10/log10f.c, revision bc385c2). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ #include <math.h> -#include <math_private.h> -#include <fix-int-fp-convert-zero.h> +#include <stdint.h> #include <libm-alias-finite.h> +#include "math_config.h" -static const float -two25 = 3.3554432000e+07, /* 0x4c000000 */ -ivln10 = 4.3429449201e-01, /* 0x3ede5bd9 */ -log10_2hi = 3.0102920532e-01, /* 0x3e9a2080 */ -log10_2lo = 7.9034151668e-07; /* 0x355427db */ +static __attribute__ ((noinline)) float +as_special (float x) +{ + uint32_t ux = asuint (x); + if (ux == 0x7f800000u) + return x; /* +inf */ + uint32_t ax = ux << 1; + if (ax == 0u) + /* -0.0 */ + return __math_divzerof (1); + if (ax > 0xff000000u) + return x + x; /* nan */ + return __math_invalidf (x); +} float -__ieee754_log10f(float x) +__ieee754_log10f (float x) { - float y,z; - int32_t i,k,hx; - - GET_FLOAT_WORD(hx,x); - - k=0; - if (hx < 0x00800000) { /* x < 2**-126 */ - if (__builtin_expect((hx&0x7fffffff)==0, 0)) - return -two25/fabsf (x); /* log(+-0)=-inf */ - if (__builtin_expect(hx<0, 0)) - return (x-x)/(x-x); /* log(-#) = NaN */ - k -= 25; x *= two25; /* subnormal number, scale up x */ - GET_FLOAT_WORD(hx,x); + static const double tr[] = + { + 0x1p+0, 0x1.f81f82p-1, 0x1.f07c1fp-1, 0x1.e9131acp-1, + 0x1.e1e1e1ep-1, 0x1.dae6077p-1, 0x1.d41d41dp-1, 0x1.cd85689p-1, + 0x1.c71c71cp-1, 0x1.c0e0704p-1, 0x1.bacf915p-1, 0x1.b4e81b5p-1, + 0x1.af286bdp-1, 0x1.a98ef6p-1, 0x1.a41a41ap-1, 0x1.9ec8e95p-1, + 0x1.999999ap-1, 0x1.948b0fdp-1, 0x1.8f9c19p-1, 0x1.8acb90fp-1, + 0x1.8618618p-1, 0x1.8181818p-1, 0x1.7d05f41p-1, 0x1.78a4c81p-1, + 0x1.745d174p-1, 0x1.702e05cp-1, 0x1.6c16c17p-1, 0x1.6816817p-1, + 0x1.642c859p-1, 0x1.605816p-1, 0x1.5c9882cp-1, 0x1.58ed231p-1, + 0x1.5555555p-1, 0x1.51d07ebp-1, 0x1.4e5e0a7p-1, 0x1.4afd6ap-1, + 0x1.47ae148p-1, 0x1.446f865p-1, 0x1.4141414p-1, 0x1.3e22cbdp-1, + 0x1.3b13b14p-1, 0x1.3813814p-1, 0x1.3521cfbp-1, 0x1.323e34ap-1, + 0x1.2f684bep-1, 0x1.2c9fb4ep-1, 0x1.29e412ap-1, 0x1.27350b9p-1, + 0x1.2492492p-1, 0x1.21fb781p-1, 0x1.1f7047ep-1, 0x1.1cf06aep-1, + 0x1.1a7b961p-1, 0x1.1811812p-1, 0x1.15b1e5fp-1, 0x1.135c811p-1, + 0x1.1111111p-1, 0x1.0ecf56cp-1, 0x1.0c9715p-1, 0x1.0a6810ap-1, + 0x1.0842108p-1, 0x1.0624dd3p-1, 0x1.041041p-1, 0x1.0204081p-1, + 0.5 + }; + static const double tl[] = + { + -0x1.d45fd6237ebe3p-47, 0x1.b947689311b6ep-8, 0x1.b5e909c96d7d5p-7, + 0x1.45f4f59ed2165p-6, 0x1.af5f92cbd8f1ep-6, 0x1.0ba01a606de8cp-5, + 0x1.3ed119b9a2b7bp-5, 0x1.714834298eec2p-5, 0x1.a30a9d98357fbp-5, + 0x1.d41d512670813p-5, 0x1.02428c0f65519p-4, 0x1.1a23444eecc3ep-4, + 0x1.31b30543f4cb4p-4, 0x1.48f3ed39bfd04p-4, 0x1.5fe8049a0e423p-4, + 0x1.769140a6aa008p-4, 0x1.8cf1836c98cb3p-4, 0x1.a30a9d55541a1p-4, + 0x1.b8de4d1ee823ep-4, 0x1.ce6e4202ca2e6p-4, 0x1.e3bc1accace07p-4, + 0x1.f8c9683b5abd4p-4, 0x1.06cbd68ca9a6ep-3, 0x1.11142f19df73p-3, + 0x1.1b3e71fa7a97fp-3, 0x1.254b4d37a46e3p-3, 0x1.2f3b6912cbf07p-3, + 0x1.390f683115886p-3, 0x1.42c7e7fffc5a8p-3, 0x1.4c65808c78d3cp-3, + 0x1.55e8c50751c55p-3, 0x1.5f52445dec3d8p-3, 0x1.68a288c3f12p-3, + 0x1.71da17bdf0d19p-3, 0x1.7af973608afd9p-3, 0x1.84011952a2579p-3, + 0x1.8cf1837a7ea6p-3, 0x1.95cb2891e43d6p-3, 0x1.9e8e7b0f869ep-3, + 0x1.a73beaa5db18dp-3, 0x1.afd3e394558d3p-3, 0x1.b856cf060d9f1p-3, + 0x1.c0c5134de1ffcp-3, 0x1.c91f1371bc99fp-3, 0x1.d1652ffcd3f53p-3, + 0x1.d997c6f635e75p-3, 0x1.e1b733ab90f3bp-3, 0x1.e9c3ceadac856p-3, + 0x1.f1bdeec43a305p-3, 0x1.f9a5e7a5fa3fep-3, 0x1.00be05ac02f2bp-2, + 0x1.04a054d81a2d4p-2, 0x1.087a0835957fbp-2, 0x1.0c4b457099517p-2, + 0x1.101431aa1fe51p-2, 0x1.13d4f08b98dd8p-2, 0x1.178da53edb892p-2, + 0x1.1b3e71e9f9d58p-2, 0x1.1ee777defdeedp-2, 0x1.2288d7b48e23bp-2, + 0x1.2622b0f52e49fp-2, 0x1.29b522a4c6314p-2, 0x1.2d404b0e30f8p-2, + 0x1.30c4478f3fbe5p-2, 0x1.34413509f7915p-2 + }; + static const union + { + float f; + uint32_t u; + } st[] = + { + { 0x1p+0 }, { 0x1.4p+3 }, { 0x1.9p+6 }, { 0x1.f4p+9 }, + { 0x1.388p+13 }, { 0x1.86ap+16 }, { 0x1.e848p+19 }, { 0x1.312dp+23 }, + { 0x1.7d784p+26 }, { 0x1.dcd65p+29 }, { 0x1.2a05f2p+33 }, { 0 }, + { 0 }, { 0 }, { 0 }, { 0 } + }; + static const double b[] = + { + 0x1.bcb7b15c5a2f8p-2, -0x1.bcbb1dbb88ebap-3, 0x1.2871c39d521c6p-3 + }; + static const double c[] = + { + 0x1.bcb7b1526e50ep-2, -0x1.bcb7b1526e53dp-3, 0x1.287a7636f3fa2p-3, + -0x1.bcb7b146a14b3p-4, 0x1.63c627d5219cbp-4, -0x1.2880736c8762dp-4, + 0x1.fc1ecf913961ap-5 + }; + uint32_t ux = asuint (x); + if (__glibc_unlikely (ux < (1 << 23) || ux >= 0x7f800000u)) + { + if (ux == 0 || ux >= 0x7f800000u) + return as_special (x); + /* subnormal */ + int n = __builtin_clz (ux) - 8; + ux <<= n; + ux -= n << 23; + } + unsigned m = ux & ((1 << 23) - 1), j = (m + (1 << (23 - 7))) >> (23 - 6); + double ix = tr[j], l = tl[j]; + int e = ((int) ux >> 23) - 127; + unsigned je = e + 1; + je = (je * 0x4d104d4) >> 28; + if (__glibc_unlikely (ux == st[je].u)) + return je; + + double tz = asdouble (((int64_t) m | ((int64_t) 1023 << 23)) << (52 - 23)); + double z = tz * ix - 1, z2 = z * z; + double r + = ((e * 0x1.34413509f79ffp-2 + l) + z * b[0]) + z2 * (b[1] + z * b[2]); + float ub = r, lb = r + 0x1.b008p-34; + if (__glibc_unlikely (ub != lb)) + { + double f = z + * ((c[0] + z * c[1]) + + z2 + * ((c[2] + z * c[3]) + + z2 * (c[4] + z * c[5] + z2 * c[6]))); + f -= 0x1.0cee0ed4ca7e9p-54 * e; + f += l - tl[0]; + double el = e * 0x1.34413509f7ap-2; + r = el + f; + ub = r; + tz = r; + if (__glibc_unlikely (!((asuint64 (tz) & ((1 << 28) - 1))))) + { + double dr = (el - r) + f; + r += dr * 32; + ub = r; } - if (__builtin_expect(hx >= 0x7f800000, 0)) return x+x; - k += (hx>>23)-127; - i = ((uint32_t)k&0x80000000)>>31; - hx = (hx&0x007fffff)|((0x7f-i)<<23); - y = (float)(k+i); - if (FIX_INT_FP_CONVERT_ZERO && y == 0.0f) - y = 0.0f; - SET_FLOAT_WORD(x,hx); - z = y*log10_2lo + ivln10*__ieee754_logf(x); - return z+y*log10_2hi; + } + return ub; } libm_alias_finite (__ieee754_log10f, __log10f) diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 729f22cd4f..dc07ebd459 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -166,9 +166,9 @@ extern const struct exp2f_data uint64_t tab[1 << EXP2F_TABLE_BITS]; double shift_scaled; double poly[EXP2F_POLY_ORDER]; - double shift; double invln2_scaled; double poly_scaled[EXP2F_POLY_ORDER]; + double shift; } __exp2f_data attribute_hidden; #define LOGF_TABLE_BITS 4 diff --git a/sysdeps/ieee754/flt-32/s_exp10m1f.c b/sysdeps/ieee754/flt-32/s_exp10m1f.c new file mode 100644 index 0000000000..ea3173a174 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_exp10m1f.c @@ -0,0 +1,227 @@ +/* Implementation of the exp10m1 function for binary32. + +Copyright (c) 2022-2024 Alexei Sibidanov. Paul Zimmermann. + +The original version of this file was copied from the CORE-MATH +project (file src/binary32/exp10m1/exp10m1f.c, revision c46b85b). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + */ + +#include <math.h> +#include "math_config.h" +#include <libm-alias-float.h> + +float +__exp10m1f (float x) +{ + const double iln10h = 0x1.a934f09p+1 * 16; + const double iln10l = 0x1.e68dc57f2496p-29 * 16; + double z = x; + uint32_t ux = asuint (x); + uint32_t ax = ux & (~0u >> 1); + if (__glibc_unlikely (ux > 0xc0f0d2f1u)) + { /* x < -7.52575 */ + if (ax > (0xffu << 23)) + return x + x; /* nan */ + return (ux == 0xff800000) ? -0x1p+0f : -0x1p+0f + 0x1p-26f; + } + else if (__glibc_unlikely (ax > 0x421a209au)) + { /* x > 38.5318 */ + if (ax >= asuint (INFINITY)) + return x + x; /* +Inf or NaN */ + return __math_oflowf (0); + } + else if (__glibc_unlikely (ax < 0x3d89c604u)) + { /* |x| < 0.1549/log(10) */ + double z2 = z * z, r; + if (__glibc_unlikely (ax < 0x3d1622fbu)) + { /* |x| < 8.44e-2/log(10) */ + if (__glibc_unlikely (ax < 0x3c8b76a3u)) + { /* |x| < 3.92e-2/log(10) */ + if (__glibc_unlikely (ax < 0x3bcced04u)) + { /* |x| < 1.44e-2/log(10) */ + if (__glibc_unlikely (ax < 0x3acf33ebu)) + { /* |x| < 3.64e-3/log(10 */ + if (__glibc_unlikely (ax < 0x395a966bu)) + { /* |x| < 4.8e-4/log(10 */ + if (__glibc_unlikely (ax < 0x36fe4a4bu)) + { /* |x| < 1.745e-5/log(10) */ + if (__glibc_unlikely (ax < 0x32407f39u)) + { /* |x| < 2.58e-8/log(10) */ + if (__glibc_unlikely (ax < 0x245e5bd9u)) + { /* |x| < 4.82164e-17 */ + r = 0x1.26bb1bbb55516p+1; + } + else + { + if (__glibc_unlikely (ux == 0x2c994b7bu)) + return 0x1.60f974p-37f - 0x1p-90f; + r = 0x1.26bb1bbb55516p+1 + + z * 0x1.53524c73cea69p+1; + } + } + else + { + if (__glibc_unlikely (ux == 0xb6fa215bu)) + return -0x1.1ff87ep-16 + 0x1p-68; + r = 0x1.26bb1bbb55516p+1 + + z * (0x1.53524c73ea62fp+1 + + z * 0x1.0470591de2c75p+1); + } + } + else + { + static const double cp[] = + { + 0x1.26bb1bbb55515p+1, 0x1.53524c73cea69p+1, + 0x1.0470595038cc2p+1, 0x1.2bd7609fe1561p+0 + }; + r = (cp[0] + z * cp[1]) + + z2 * (cp[2] + z * cp[3]); + } + } + else + { + static const double cp[] = + { + 0x1.26bb1bbb55516p+1, 0x1.53524c73ce6dbp+1, + 0x1.0470591de3024p+1, 0x1.2bd76b79060e6p+0, + 0x1.1429ffd3a963dp-1 + }; + r = (cp[0] + z * cp[1]) + + z2 * (cp[2] + z * (cp[3] + z * cp[4])); + } + } + else + { + static const double cp[] = + { + 0x1.26bb1bbb55516p+1, 0x1.53524c73cea67p+1, + 0x1.0470591dc2953p+1, 0x1.2bd760a004d64p+0, + 0x1.142a85da6f072p-1, 0x1.a7ed70725b00ep-3 + }; + r = (cp[0] + z * cp[1]) + z2 + * ((cp[2] + z * cp[3]) + + z2 * (cp[4] + z * cp[5])); + } + } + else + { + static const double cp[] = + { + 0x1.26bb1bbb55516p+1, 0x1.53524c73ceadep+1, + 0x1.0470591de2bb4p+1, 0x1.2bd76099a9d33p+0, + 0x1.1429ffd829b0bp-1, 0x1.a7f2a6a0f7dc8p-3, + 0x1.16e4dfbce0f56p-4 + }; + r = (cp[0] + z * cp[1]) + + z2 * ((cp[2] + z * cp[3]) + + z2 * (cp[4] + z * (cp[5] + z * cp[6]))); + } + } + else + { + static const double cp[] = + { + 0x1.26bb1bbb55515p+1, 0x1.53524c73cea6ap+1, + 0x1.0470591de476p+1, 0x1.2bd7609fd4ee2p+0, + 0x1.1429ff70a9b48p-1, 0x1.a7ed71259ba5bp-3, + 0x1.16f3004fb3ac1p-4, 0x1.4116b0388aa9fp-6 + }; + r = ((cp[0] + z * cp[1]) + z2 * (cp[2] + z * cp[3])) + + (z2 * z2) * ((cp[4] + z * cp[5]) + z2 * (cp[6] + + z * cp[7])); + } + } + else + { + static const double cp[] = + { + 0x1.26bb1bbb55515p+1, 0x1.53524c73cea42p+1, 0x1.0470591de2d1dp+1, + 0x1.2bd760a010a53p+0, 0x1.1429ffd16170cp-1, 0x1.a7ed6b2a0d97fp-3, + 0x1.16e4e37fa51e4p-4, 0x1.4147fe4c1676fp-6, 0x1.4897c4b3e329ap-8 + }; + r = ((cp[0] + z * cp[1]) + z2 * (cp[2] + z * cp[3])) + + (z2 * z2) * ((cp[4] + z * cp[5]) + + z2 * (cp[6] + z * (cp[7] + z * cp[8]))); + } + r *= z; + return r; + } + else + { + /* -7.52575 < x < -0.1549/log(10) or 0.1549/log(10) < x < 38.5318 */ + static const double tb[] = + { + 0x1p+0, 0x1.0b5586cf9890fp+0, 0x1.172b83c7d517bp+0, + 0x1.2387a6e756238p+0, 0x1.306fe0a31b715p+0, 0x1.3dea64c123422p+0, + 0x1.4bfdad5362a27p+0, 0x1.5ab07dd485429p+0, 0x1.6a09e667f3bcdp+0, + 0x1.7a11473eb0187p+0, 0x1.8ace5422aa0dap+0, 0x1.9c49182a3f09p+0, + 0x1.ae89f995ad3adp+0, 0x1.c199bdd85529cp+0, 0x1.d5818dcfba487p+0, + 0x1.ea4afa2a490dap+0 + }; + static const double c[] = + { + 0x1.62e42fefa398bp-5, 0x1.ebfbdff84555ap-11, 0x1.c6b08d4ad86d3p-17, + 0x1.3b2ad1b1716a2p-23, 0x1.5d7472718ce9dp-30, 0x1.4a1d7f457ac56p-37 + }; + + if (__glibc_unlikely ((ux << 11) == 0)) + { + uint32_t k = (ux >> 21) - 0x1fc; + if (k <= 0xb) + { + if (k == 0) + return 10.0f - 1.0f; + if (k == 4) + return 100.0f - 1.0f; + if (k == 6) + return 1000.0f - 1.0f; + if (k == 8) + return 10000.0f - 1.0f; + if (k == 9) + return 100000.0f - 1.0f; + if (k == 10) + return 1000000.0f - 1.0f; + if (k == 11) + return 10000000.0f - 1.0f; + } + } + double a = iln10h * z; + double ia = floor (a); + double h = (a - ia) + iln10l * z; + int64_t i = ia; + int64_t j = i & 0xf; + int64_t e = i - j; + e >>= 4; + double s = tb[j]; + s *= asdouble ((e + 0x3ffull) << 52); + double h2 = h * h; + double c0 = c[0] + h * c[1]; + double c2 = c[2] + h * c[3]; + double c4 = c[4] + h * c[5]; + c0 += h2 * (c2 + h2 * c4); + double w = s * h; + return (s - 1.0) + w * c0; + } +} +#ifndef __exp10m1f +libm_alias_float (__exp10m1, exp10m1) +#endif diff --git a/sysdeps/ieee754/flt-32/s_exp2m1f.c b/sysdeps/ieee754/flt-32/s_exp2m1f.c new file mode 100644 index 0000000000..325ffb11b0 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_exp2m1f.c @@ -0,0 +1,194 @@ +/* Correctly-rounded base-2 exponent function biased by 1 for binary32 value. + +Copyright (c) 2022-2024 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (file src/binary32/exp2m1/exp2m1f.c, revision baf5f6b). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <fenv.h> +#include <math.h> +#include "math_config.h" +#include <libm-alias-float.h> +#include <math-narrow-eval.h> +#include <float.h> + +float +__exp2m1f (float x) +{ + double z = x; + uint32_t ux = asuint (x); + uint32_t ax = ux & (~0u >> 1); + if (__glibc_unlikely (ux >= 0xc1c80000u)) + { /* x <= -25 */ + if (ax > (0xffu << 23)) + return x + x; /* nan */ + return (ux == 0xff800000) ? -0x1p+0f : -0x1p+0f + 0x1p-26f; + } + else if (__glibc_unlikely (ax >= 0x43000000u)) + { /* x >= 128 */ + if (ax >= asuint (INFINITY)) + return x + x; /* +Inf or NaN */ + /* exp2m1 (MAX_EXP) should not overflow when rounding towards zero + or towards -Inf. We round FLT_MAX + 2^103 which is in the middle + between FLT_MAX and 2^128 (the next number with unbounded range). */ + float ret = math_narrow_eval (FLT_MAX + 0x1p103f); + if (x == FLT_MAX_EXP && ret == FLT_MAX) + return ret; + return __math_oflowf (0); + } + else if (__glibc_unlikely (ax < 0x3df95f1fu)) + { /* |x| < 8.44e-2/log(2) */ + double z2 = z * z, r; + if (__glibc_unlikely (ax < 0x3d67a4ccu)) + { /* |x| < 3.92e-2/log(2) */ + if (__glibc_unlikely (ax < 0x3caa2feeu)) + { /* |x| < 1.44e-2/log(2) */ + if (__glibc_unlikely (ax < 0x3bac1405u)) + { /* |x| < 3.64e-3/log(2) */ + if (__glibc_unlikely (ax < 0x3a358876u)) + { /* |x| < 4.8e-4/log(2) */ + if (__glibc_unlikely (ax < 0x37d32ef6u)) + { /* |x| < 1.745e-5/log(2) */ + if (__glibc_unlikely (ax < 0x331fdd82u)) + { /* |x| < 2.58e-8/log(2) */ + if (__glibc_unlikely (ax < 0x2538aa3bu)) + /* |x| < 1.60171e-16 */ + r = 0x1.62e42fefa39efp-1; + else + r = 0x1.62e42fefa39fp-1 + + z * 0x1.ebfbdff82c58fp-3; + } + else + { + if (__glibc_unlikely (ux == 0xb3d85005u)) + return -0x1.2bdf76p-24 - 0x1.8p-77; + if (__glibc_unlikely (ux == 0x3338428du)) + return 0x1.fee08ap-26 + 0x1p-80; + static const double c[] = + { + 0x1.62e42fefa39efp-1, 0x1.ebfbdff8548fdp-3, + 0x1.c6b08d704a06dp-5 + }; + r = c[0] + z * (c[1] + z * c[2]); + } + } + else + { + if (__glibc_unlikely (ux == 0x388bca4fu)) + return 0x1.839702p-15 - 0x1.8p-68; + static const double c[] = + { + 0x1.62e42fefa39efp-1, 0x1.ebfbdff82c58fp-3, + 0x1.c6b08dc82b347p-5, 0x1.3b2ab6fbad172p-7 + }; + r = (c[0] + z * c[1]) + z2 * (c[2] + z * c[3]); + } + } + else + { + static const double c[] = + { + 0x1.62e42fefa39efp-1, 0x1.ebfbdff82c068p-3, + 0x1.c6b08d704a6dcp-5, 0x1.3b2ac262c3eedp-7, + 0x1.5d87fe7af779ap-10 + }; + r = (c[0] + z * c[1]) + + z2 * (c[2] + z * (c[3] + z * c[4])); + } + } + else + { + static const double c[] = + { + 0x1.62e42fefa39fp-1, 0x1.ebfbdff82c58dp-3, + 0x1.c6b08d7011d13p-5, 0x1.3b2ab6fbd267dp-7, + 0x1.5d88a81cea49ep-10, 0x1.430912ea9b963p-13 + }; + r = (c[0] + z * c[1]) + + z2 * ((c[2] + z * c[3]) + z2 * (c[4] + z * c[5])); + } + } + else + { + static const double c[] = + { + 0x1.62e42fefa39efp-1, 0x1.ebfbdff82c639p-3, + 0x1.c6b08d7049f1cp-5, 0x1.3b2ab6f5243bdp-7, + 0x1.5d87fe80a9e6cp-10, 0x1.430d0b9257fa8p-13, + 0x1.ffcbfc4cf0952p-17 + }; + r = (c[0] + z * c[1]) + + z2 * ((c[2] + z * c[3]) + + z2 * (c[4] + z * (c[5] + z * c[6]))); + } + } + else + { + static const double c[] = + { + 0x1.62e42fefa39efp-1, 0x1.ebfbdff82c591p-3, + 0x1.c6b08d704cf6bp-5, 0x1.3b2ab6fba00cep-7, + 0x1.5d87fdfdaadb4p-10, 0x1.4309137333066p-13, + 0x1.ffe5e90daf7ddp-17, 0x1.62c0220eed731p-20 + }; + r = ((c[0] + z * c[1]) + z2 * (c[2] + z * c[3])) + + (z2 * z2) * ((c[4] + z * c[5]) + z2 * (c[6] + z * c[7])); + } + r *= z; + return r; + } + else + { + static const double c[] = + { + 0x1.62e42fefa398bp-5, 0x1.ebfbdff84555ap-11, + 0x1.c6b08d4ad86d3p-17, 0x1.3b2ad1b1716a2p-23, + 0x1.5d7472718ce9dp-30, 0x1.4a1d7f457ac56p-37 + }; + static const double tb[] = + { + 0x1p+0, 0x1.0b5586cf9890fp+0, 0x1.172b83c7d517bp+0, + 0x1.2387a6e756238p+0, 0x1.306fe0a31b715p+0, 0x1.3dea64c123422p+0, + 0x1.4bfdad5362a27p+0, 0x1.5ab07dd485429p+0, 0x1.6a09e667f3bcdp+0, + 0x1.7a11473eb0187p+0, 0x1.8ace5422aa0dap+0, 0x1.9c49182a3f09p+0, + 0x1.ae89f995ad3adp+0, 0x1.c199bdd85529cp+0, 0x1.d5818dcfba487p+0, + 0x1.ea4afa2a490dap+0 + }; + double a = 16.0 * z; + double ia = floor (a); + double h = a - ia; + double h2 = h * h; + int64_t i = ia, j = i & 0xf, e = i - j; + e >>= 4; + double s = tb[j]; + s *= asdouble ((e + 0x3ffull) << 52); + double c0 = c[0] + h * c[1]; + double c2 = c[2] + h * c[3]; + double c4 = c[4] + h * c[5]; + c0 += h2 * (c2 + h2 * c4); + double w = s * h; + return (s - 1.0) + w * c0; + } +} +#ifndef __exp2m1f +libm_alias_float (__exp2m1, exp2m1) +#endif diff --git a/sysdeps/ieee754/flt-32/s_expm1f.c b/sysdeps/ieee754/flt-32/s_expm1f.c index 35f7b5214a..edd7c9acf8 100644 --- a/sysdeps/ieee754/flt-32/s_expm1f.c +++ b/sysdeps/ieee754/flt-32/s_expm1f.c @@ -1,132 +1,124 @@ -/* s_expm1f.c -- float version of s_expm1.c. - */ +/* Correctly-rounded natural exponent function biased by 1 for binary32 value. -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +Copyright (c) 2022-2024 Alexei Sibidanov. + +This file is part of the CORE-MATH project +project (file src/binary32/expm1/expm1f.c, revision bc385c2). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ -#include <errno.h> -#include <float.h> #include <math.h> -#include <math-barriers.h> -#include <math_private.h> #include <math-underflow.h> #include <libm-alias-float.h> - -static const float huge = 1.0e+30; -static const float tiny = 1.0e-30; - -static const float -one = 1.0, -o_threshold = 8.8721679688e+01,/* 0x42b17180 */ -ln2_hi = 6.9313812256e-01,/* 0x3f317180 */ -ln2_lo = 9.0580006145e-06,/* 0x3717f7d1 */ -invln2 = 1.4426950216e+00,/* 0x3fb8aa3b */ - /* scaled coefficients related to expm1 */ -Q1 = -3.3333335072e-02, /* 0xbd088889 */ -Q2 = 1.5873016091e-03, /* 0x3ad00d01 */ -Q3 = -7.9365076090e-05, /* 0xb8a670cd */ -Q4 = 4.0082177293e-06, /* 0x36867e54 */ -Q5 = -2.0109921195e-07; /* 0xb457edbb */ +#include "math_config.h" float -__expm1f(float x) +__expm1f (float x) { - float y,hi,lo,c,t,e,hxs,hfx,r1; - int32_t k,xsb; - uint32_t hx; - - GET_FLOAT_WORD(hx,x); - xsb = hx&0x80000000; /* sign bit of x */ - if(xsb==0) y=x; else y= -x; /* y = |x| */ - hx &= 0x7fffffff; /* high word of |x| */ - - /* filter out huge and non-finite argument */ - if(hx >= 0x4195b844) { /* if |x|>=27*ln2 */ - if(hx >= 0x42b17218) { /* if |x|>=88.721... */ - if(hx>0x7f800000) - return x+x; /* NaN */ - if(hx==0x7f800000) - return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */ - if(x > o_threshold) { - __set_errno (ERANGE); - return huge*huge; /* overflow */ - } - } - if(xsb!=0) { /* x < -27*ln2, return -1.0 with inexact */ - math_force_eval(x+tiny);/* raise inexact */ - return tiny-one; /* return -1 */ - } - } - - /* argument reduction */ - if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */ - if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */ - if(xsb==0) - {hi = x - ln2_hi; lo = ln2_lo; k = 1;} - else - {hi = x + ln2_hi; lo = -ln2_lo; k = -1;} - } else { - k = invln2*x+((xsb==0)?(float)0.5:(float)-0.5); - t = k; - hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ - lo = t*ln2_lo; - } - x = hi - lo; - c = (hi-x)-lo; - } - else if(hx < 0x33000000) { /* when |x|<2**-25, return x */ - math_check_force_underflow (x); - t = huge+x; /* return x with inexact flags when x!=0 */ - return x - (t-(huge+x)); + static const double c[] = + { + 1, 0x1.62e42fef4c4e7p-6, 0x1.ebfd1b232f475p-13, 0x1.c6b19384ecd93p-20 + }; + static const double ch[] = + { + 0x1.62e42fefa39efp-6, 0x1.ebfbdff82c58fp-13, 0x1.c6b08d702e0edp-20, + 0x1.3b2ab6fb92e5ep-27, 0x1.5d886e6d54203p-35, 0x1.430976b8ce6efp-43 + }; + static const double td[] = + { + 0x1p+0, 0x1.059b0d3158574p+0, 0x1.0b5586cf9890fp+0, + 0x1.11301d0125b51p+0, 0x1.172b83c7d517bp+0, 0x1.1d4873168b9aap+0, + 0x1.2387a6e756238p+0, 0x1.29e9df51fdee1p+0, 0x1.306fe0a31b715p+0, + 0x1.371a7373aa9cbp+0, 0x1.3dea64c123422p+0, 0x1.44e086061892dp+0, + 0x1.4bfdad5362a27p+0, 0x1.5342b569d4f82p+0, 0x1.5ab07dd485429p+0, + 0x1.6247eb03a5585p+0, 0x1.6a09e667f3bcdp+0, 0x1.71f75e8ec5f74p+0, + 0x1.7a11473eb0187p+0, 0x1.82589994cce13p+0, 0x1.8ace5422aa0dbp+0, + 0x1.93737b0cdc5e5p+0, 0x1.9c49182a3f09p+0, 0x1.a5503b23e255dp+0, + 0x1.ae89f995ad3adp+0, 0x1.b7f76f2fb5e47p+0, 0x1.c199bdd85529cp+0, + 0x1.cb720dcef9069p+0, 0x1.d5818dcfba487p+0, 0x1.dfc97337b9b5fp+0, + 0x1.ea4afa2a490dap+0, 0x1.f50765b6e454p+0 + }; + const double iln2 = 0x1.71547652b82fep+5; + const double big = 0x1.8p52; + double z = x; + uint32_t ux = asuint (x); + uint32_t ax = ux << 1; + if (__glibc_likely (ax < 0x7c400000u)) + { /* |x| < 0.15625 */ + if (__glibc_unlikely (ax < 0x676a09e8u)) + { /* |x| < 0x1.6a09e8p-24 */ + if (__glibc_unlikely (ax == 0x0u)) + return x; /* x = +-0 */ + return fmaf (fabsf (x), 0x1p-25f, x); } - else k = 0; - - /* x is now in primary range */ - hfx = (float)0.5*x; - hxs = x*hfx; - r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); - t = (float)3.0-r1*hfx; - e = hxs*((r1-t)/((float)6.0 - x*t)); - if(k==0) return x - (x*e-hxs); /* c is 0 */ - else { - e = (x*(e-c)-c); - e -= hxs; - if(k== -1) return (float)0.5*(x-e)-(float)0.5; - if(k==1) { - if(x < (float)-0.25) return -(float)2.0*(e-(x+(float)0.5)); - else return one+(float)2.0*(x-e); - } - if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */ - int32_t i; - y = one-(e-x); - GET_FLOAT_WORD(i,y); - SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */ - return y-one; - } - t = one; - if(k<23) { - int32_t i; - SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */ - y = t-(e-x); - GET_FLOAT_WORD(i,y); - SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */ - } else { - int32_t i; - SET_FLOAT_WORD(t,((0x7f-k)<<23)); /* 2^-k */ - y = x-(e+t); - y += one; - GET_FLOAT_WORD(i,y); - SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */ - } + static const double b[] = + { + 0x1.fffffffffffc2p-2, 0x1.55555555555fep-3, 0x1.555555559767fp-5, + 0x1.1111111098dc1p-7, 0x1.6c16bca988aa9p-10, 0x1.a01a07658483fp-13, + 0x1.a05b04d2c3503p-16, 0x1.71de3a960b5e3p-19 + }; + double z2 = z * z, z4 = z2 * z2; + double r = z + z2 + * ((b[0] + z * b[1]) + z2 * (b[2] + z * b[3]) + + z4 * ((b[4] + z * b[5]) + z2 * (b[6] + z * b[7]))); + return r; + } + if (__glibc_unlikely (ax >= 0x8562e430u)) + { /* |x| > 88.72 */ + if (ax > (0xffu << 24)) + return x + x; /* nan */ + if (__glibc_unlikely (ux >> 31)) + { /* x < 0 */ + if (ax == (0xffu << 24)) + return -1.0f; + return -1.0f + 0x1p-26f; } - return y; + if (ax == (0xffu << 24)) + return INFINITY; + return __math_oflowf (0); + } + double a = iln2 * z; + double ia = roundeven (a); + double h = a - ia; + double h2 = h * h; + uint64_t u = asuint64 (ia + big); + double c2 = c[2] + h * c[3], c0 = c[0] + h * c[1]; + const uint64_t *tdl = (uint64_t *) ((void *) td); + double sv = asdouble (tdl[u & 0x1f] + ((u >> 5) << 52)); + double r = (c0 + h2 * c2) * sv - 1.0; + float ub = r, lb = r - sv * 0x1.3b3p-33; + if (__glibc_unlikely (ub != lb)) + { + if (__glibc_unlikely (ux > 0xc18aa123u)) /* x < -17.32 */ + return -1.0f + 0x1p-26f; + const double iln2h = 0x1.7154765p+5; + const double iln2l = 0x1.5c17f0bbbe88p-26; + double s = sv; + h = (iln2h * z - ia) + iln2l * z; + h2 = h * h; + double w = s * h; + r = (s - 1) + w + * ((ch[0] + h * ch[1]) + + h2 * ((ch[2] + h * ch[3]) + h2 * (ch[4] + h * ch[5]))); + ub = r; + } + return ub; } libm_alias_float (__expm1, expm1) diff --git a/sysdeps/ieee754/flt-32/s_log10p1f.c b/sysdeps/ieee754/flt-32/s_log10p1f.c new file mode 100644 index 0000000000..64deb1eeda --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_log10p1f.c @@ -0,0 +1,182 @@ +/* Correctly-rounded biased argument base-10 logarithm function for binary32 value. + +Copyright (c) 2022-2023 Alexei Sibidanov. + +This file is part of the CORE-MATH project +project (file src/binary32/log10p1/log10p1f.c revision bc385c2). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <stdint.h> +#include <errno.h> +#include <fenv.h> +#include <libm-alias-float.h> +#include "math_config.h" + +static __attribute__ ((noinline)) float +as_special (float x) +{ + uint32_t ux = asuint (x); + if (ux == 0x7f800000u) + return x; /* +inf */ + uint32_t ax = ux << 1; + if (ax == 0x17fu << 24) + /* x+1 = 0.0 */ + return __math_divzerof (1); + if (ax > 0xff000000u) + return x + x; /* nan */ + return __math_invalidf (x); +} + +float +__log10p1f (float x) +{ + static const double tr[] = + { + 0x1p+0, 0x1.f81f82p-1, 0x1.f07c1fp-1, 0x1.e9131acp-1, + 0x1.e1e1e1ep-1, 0x1.dae6077p-1, 0x1.d41d41dp-1, 0x1.cd85689p-1, + 0x1.c71c71cp-1, 0x1.c0e0704p-1, 0x1.bacf915p-1, 0x1.b4e81b5p-1, + 0x1.af286bdp-1, 0x1.a98ef6p-1, 0x1.a41a41ap-1, 0x1.9ec8e95p-1, + 0x1.999999ap-1, 0x1.948b0fdp-1, 0x1.8f9c19p-1, 0x1.8acb90fp-1, + 0x1.8618618p-1, 0x1.8181818p-1, 0x1.7d05f41p-1, 0x1.78a4c81p-1, + 0x1.745d174p-1, 0x1.702e05cp-1, 0x1.6c16c17p-1, 0x1.6816817p-1, + 0x1.642c859p-1, 0x1.605816p-1, 0x1.5c9882cp-1, 0x1.58ed231p-1, + 0x1.5555555p-1, 0x1.51d07ebp-1, 0x1.4e5e0a7p-1, 0x1.4afd6ap-1, + 0x1.47ae148p-1, 0x1.446f865p-1, 0x1.4141414p-1, 0x1.3e22cbdp-1, + 0x1.3b13b14p-1, 0x1.3813814p-1, 0x1.3521cfbp-1, 0x1.323e34ap-1, + 0x1.2f684bep-1, 0x1.2c9fb4ep-1, 0x1.29e412ap-1, 0x1.27350b9p-1, + 0x1.2492492p-1, 0x1.21fb781p-1, 0x1.1f7047ep-1, 0x1.1cf06aep-1, + 0x1.1a7b961p-1, 0x1.1811812p-1, 0x1.15b1e5fp-1, 0x1.135c811p-1, + 0x1.1111111p-1, 0x1.0ecf56cp-1, 0x1.0c9715p-1, 0x1.0a6810ap-1, + 0x1.0842108p-1, 0x1.0624dd3p-1, 0x1.041041p-1, 0x1.0204081p-1, + 0.5 + }; + static const double tl[] = + { + 0x1.562ec497ef351p-43, 0x1.b9476892ea99cp-8, 0x1.b5e909c959eecp-7, + 0x1.45f4f59ec84fp-6, 0x1.af5f92cbcf2aap-6, 0x1.0ba01a6069052p-5, + 0x1.3ed119b99dd41p-5, 0x1.714834298a088p-5, 0x1.a30a9d98309c1p-5, + 0x1.d41d51266b9d9p-5, 0x1.02428c0f62dfcp-4, 0x1.1a23444eea521p-4, + 0x1.31b30543f2597p-4, 0x1.48f3ed39bd5e7p-4, 0x1.5fe8049a0bd06p-4, + 0x1.769140a6a78eap-4, 0x1.8cf1836c96595p-4, 0x1.a30a9d5551a84p-4, + 0x1.b8de4d1ee5b21p-4, 0x1.ce6e4202c7bc9p-4, 0x1.e3bc1accaa6eap-4, + 0x1.f8c9683b584b7p-4, 0x1.06cbd68ca86ep-3, 0x1.11142f19de3a2p-3, + 0x1.1b3e71fa795fp-3, 0x1.254b4d37a3354p-3, 0x1.2f3b6912cab79p-3, + 0x1.390f6831144f7p-3, 0x1.42c7e7fffb21ap-3, 0x1.4c65808c779aep-3, + 0x1.55e8c507508c7p-3, 0x1.5f52445deb049p-3, 0x1.68a288c3efe72p-3, + 0x1.71da17bdef98bp-3, 0x1.7af9736089c4bp-3, 0x1.84011952a11ebp-3, + 0x1.8cf1837a7d6d1p-3, 0x1.95cb2891e3048p-3, 0x1.9e8e7b0f85651p-3, + 0x1.a73beaa5d9dfep-3, 0x1.afd3e39454544p-3, 0x1.b856cf060c662p-3, + 0x1.c0c5134de0c6dp-3, 0x1.c91f1371bb611p-3, 0x1.d1652ffcd2bc5p-3, + 0x1.d997c6f634ae6p-3, 0x1.e1b733ab8fbadp-3, 0x1.e9c3ceadab4c8p-3, + 0x1.f1bdeec438f77p-3, 0x1.f9a5e7a5f906fp-3, 0x1.00be05ac02564p-2, + 0x1.04a054d81990cp-2, 0x1.087a083594e33p-2, 0x1.0c4b457098b4fp-2, + 0x1.101431aa1f48ap-2, 0x1.13d4f08b98411p-2, 0x1.178da53edaecbp-2, + 0x1.1b3e71e9f9391p-2, 0x1.1ee777defd526p-2, 0x1.2288d7b48d874p-2, + 0x1.2622b0f52dad8p-2, 0x1.29b522a4c594cp-2, 0x1.2d404b0e305b9p-2, + 0x1.30c4478f3f21dp-2, 0x1.34413509f6f4dp-2 + }; + static const union + { + float f; + uint32_t u; + } st[] = + { + { 0x0p+0 }, { 0x1.2p+3 }, { 0x1.8cp+6 }, + { 0x1.f38p+9 }, { 0x1.3878p+13 }, { 0x1.869fp+16 }, + { 0x1.e847ep+19 }, { 0x1.312cfep+23 } + }; + double z = x; + uint32_t ux = asuint (x); + if (__glibc_unlikely (ux >= 0x17fu << 23)) /* x <= -1 */ + return as_special (x); + uint32_t ax = ux & (~0u >> 1); + if (__glibc_unlikely (ax == 0)) + return copysign (0, x); + if (__glibc_unlikely (ax >= (0xff << 23))) /* +inf, nan */ + return as_special (x); + int ie = ux; + ie >>= 23; + unsigned int je = ie - 126; + je = (je * 0x9a209a8) >> 29; + if (__glibc_unlikely (ux == st[je].u)) + return je; + + uint64_t tz = asuint64 (z + 1.0); + uint64_t m = tz & (~(uint64_t) 0 >> 12); + int32_t e = (tz >> 52) - 1023, j = ((m + ((int64_t) 1 << 45)) >> 46); + tz = m | ((uint64_t) 0x3ff << 52); + double ix = tr[j], l = tl[j]; + double off = e * 0x1.34413509f79ffp-2 + l; + double v = asdouble (tz) * ix - 1; + + static const double h[] = + { + 0x1.bcb7b150bf6d8p-2, -0x1.bcb7b1738c07ep-3, + 0x1.287de19e795c5p-3, -0x1.bca44edc44bc4p-4 + }; + double v2 = v * v; + double f = (h[0] + v * h[1]) + v2 * (h[2] + v * h[3]); + double r = off + v * f; + float ub = r; + float lb = r + 0x1.5cp-42; + if (__glibc_unlikely (ub != lb)) + { + if (__glibc_unlikely (ax < 0x3d32743eu)) + { /* 0x1.64e87cp-5f */ + if (__glibc_unlikely (ux == 0xa6aba8afu)) + return -0x1.2a33bcp-51f + 0x1p-76f; + if (__glibc_unlikely (ux == 0xaf39b9a7u)) + return -0x1.42a342p-34f + 0x1p-59f; + if (__glibc_unlikely (ux == 0x399a7c00u)) + return 0x1.0c53cap-13f + 0x1p-38f; + z /= 2.0 + z; + double z2 = z * z, z4 = z2 * z2; + static const double c[] = + { + 0x1.bcb7b1526e50fp-1, 0x1.287a76370129dp-2, + 0x1.63c62378fa3dbp-3, 0x1.fca4139a42374p-4 + }; + float ret = z * ((c[0] + z2 * c[1]) + z4 * (c[2] + z2 * c[3])); + if (x != 0.0f && ret == 0.0) + __set_errno (ERANGE); + return ret; + } + if (__glibc_unlikely (ux == 0x7956ba5eu)) + return 0x1.16bebap+5f + 0x1p-20f; + if (__glibc_unlikely (ux == 0xbd86ffb9u)) + return -0x1.e53536p-6f + 0x1p-31f; + static const double c[] = + { + 0x1.bcb7b1526e50ep-2, -0x1.bcb7b1526e53dp-3, 0x1.287a7636f3fa2p-3, + -0x1.bcb7b146a14b3p-4, 0x1.63c627d5219cbp-4, -0x1.2880736c8762dp-4, + 0x1.fc1ecf913961ap-5 + }; + f = v + * ((c[0] + v * c[1]) + + v2 * ((c[2] + v * c[3]) + v2 * (c[4] + v * c[5] + v2 * c[6]))); + f += l - tl[0]; + double el = e * 0x1.34413509f79ffp-2; + r = el + f; + ub = r; + } + return ub; +} +libm_alias_float (__log10p1, log10p1) diff --git a/sysdeps/ieee754/flt-32/s_log1pf.c b/sysdeps/ieee754/flt-32/s_log1pf.c index 555f0f82c8..d1686e78aa 100644 --- a/sysdeps/ieee754/flt-32/s_log1pf.c +++ b/sysdeps/ieee754/flt-32/s_log1pf.c @@ -1,116 +1,181 @@ -/* s_log1pf.c -- float version of s_log1p.c. - */ +/* Correctly-rounded biased argument natural logarithm function for binary32 + value. -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +Copyright (c) 2023, 2024 Alexei Sibidanov. -#include <float.h> -#include <math.h> -#include <math-barriers.h> -#include <math_private.h> -#include <math-underflow.h> -#include <libc-diag.h> +This file is part of the CORE-MATH project +project (file src/binary32/log1p/log1pf.c revision bc385c2). -static const float -ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ -ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25 = 3.355443200e+07, /* 0x4c000000 */ -Lp1 = 6.6666668653e-01, /* 3F2AAAAB */ -Lp2 = 4.0000000596e-01, /* 3ECCCCCD */ -Lp3 = 2.8571429849e-01, /* 3E924925 */ -Lp4 = 2.2222198546e-01, /* 3E638E29 */ -Lp5 = 1.8183572590e-01, /* 3E3A3325 */ -Lp6 = 1.5313838422e-01, /* 3E1CD04F */ -Lp7 = 1.4798198640e-01; /* 3E178897 */ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -static const float zero = 0.0; +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -float -__log1pf(float x) +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <math.h> +#include <stdint.h> +#include <errno.h> +#include <libm-alias-float.h> +#include "math_config.h" + +static __attribute__ ((noinline)) float +as_special (float x) { - float hfsq,f,c,s,z,R,u; - int32_t k,hx,hu,ax; + uint32_t t = asuint (x); + if (t == 0xbf800000u) + return __math_divzerof (1); + if (t == 0x7f800000u) + return x; /* +inf */ + uint32_t ax = t << 1; + if (ax > 0xff000000u) + return x + x; /* nan */ + return __math_invalidf (0.0f); +} - GET_FLOAT_WORD(hx,x); - ax = hx&0x7fffffff; +float +__log1pf (float x) +{ + static const double x0[] = + { + 0x1.f81f82p-1, 0x1.e9131acp-1, 0x1.dae6077p-1, 0x1.cd85689p-1, + 0x1.c0e0704p-1, 0x1.b4e81b5p-1, 0x1.a98ef6p-1, 0x1.9ec8e95p-1, + 0x1.948b0fdp-1, 0x1.8acb90fp-1, 0x1.8181818p-1, 0x1.78a4c81p-1, + 0x1.702e05cp-1, 0x1.6816817p-1, 0x1.605816p-1, 0x1.58ed231p-1, + 0x1.51d07ebp-1, 0x1.4afd6ap-1, 0x1.446f865p-1, 0x1.3e22cbdp-1, + 0x1.3813814p-1, 0x1.323e34ap-1, 0x1.2c9fb4ep-1, 0x1.27350b9p-1, + 0x1.21fb781p-1, 0x1.1cf06aep-1, 0x1.1811812p-1, 0x1.135c811p-1, + 0x1.0ecf56cp-1, 0x1.0a6810ap-1, 0x1.0624dd3p-1, 0x1.0204081p-1 + }; + static const double lixb[] = + { + 0x1.fc0a8909b4218p-7, 0x1.77458f51aac89p-5, 0x1.341d793afb997p-4, + 0x1.a926d3a5ebd2ap-4, 0x1.0d77e7a8a823dp-3, 0x1.44d2b6c557102p-3, + 0x1.7ab89040accecp-3, 0x1.af3c94ecab3d6p-3, 0x1.e27076d54e6c9p-3, + 0x1.0a324e3888ad5p-2, 0x1.22941fc0c7357p-2, 0x1.3a64c56ae3fdbp-2, + 0x1.51aad874af21fp-2, 0x1.686c81d300eap-2, 0x1.7eaf83c7fa9b5p-2, + 0x1.947941aa610ecp-2, 0x1.a9cec9a3f023bp-2, 0x1.beb4d9ea4156ep-2, + 0x1.d32fe7f35e5c7p-2, 0x1.e7442617b817ap-2, 0x1.faf588dd5ed1p-2, + 0x1.0723e5c635c39p-1, 0x1.109f39d53c99p-1, 0x1.19ee6b38a4668p-1, + 0x1.23130d7f93c3bp-1, 0x1.2c0e9ec9b0b85p-1, 0x1.34e289cb35eccp-1, + 0x1.3d9026ad3d3f3p-1, 0x1.4618bc1eadbbbp-1, 0x1.4e7d8127dd8a9p-1, + 0x1.56bf9d5967092p-1, 0x1.5ee02a926936ep-1 + }; + static const double lix[] = + { + 0x1.fc0a890fc03e4p-7, 0x1.77458f532dcfcp-5, 0x1.341d793bbd1d1p-4, + 0x1.a926d3a6ad563p-4, 0x1.0d77e7a908e59p-3, 0x1.44d2b6c5b7d1ep-3, + 0x1.7ab890410d909p-3, 0x1.af3c94ed0bff3p-3, 0x1.e27076d5af2e6p-3, + 0x1.0a324e38b90e3p-2, 0x1.22941fc0f7966p-2, 0x1.3a64c56b145eap-2, + 0x1.51aad874df82dp-2, 0x1.686c81d3314afp-2, 0x1.7eaf83c82afc3p-2, + 0x1.947941aa916fbp-2, 0x1.a9cec9a42084ap-2, 0x1.beb4d9ea71b7cp-2, + 0x1.d32fe7f38ebd5p-2, 0x1.e7442617e8788p-2, 0x1.faf588dd8f31fp-2, + 0x1.0723e5c64df4p-1, 0x1.109f39d554c97p-1, 0x1.19ee6b38bc96fp-1, + 0x1.23130d7fabf43p-1, 0x1.2c0e9ec9c8e8cp-1, 0x1.34e289cb4e1d3p-1, + 0x1.3d9026ad556fbp-1, 0x1.4618bc1ec5ec2p-1, 0x1.4e7d8127f5bb1p-1, + 0x1.56bf9d597f399p-1, 0x1.5ee02a9281675p-1 + }; + static const double b[] = + { + 0x1p+0, + -0x1p-1, + 0x1.5555555556f6bp-2, + -0x1.00000000029b9p-2, + 0x1.9999988d176e4p-3, + -0x1.55555418889a7p-3, + 0x1.24adeca50e2bcp-3, + -0x1.001ba33bf57cfp-3 + }; - k = 1; - if (hx < 0x3ed413d7) { /* x < 0.41422 */ - if(ax>=0x3f800000) { /* x <= -1.0 */ - if(x==(float)-1.0) return -two25/zero; /* log1p(-1)=-inf */ - else return (x-x)/(x-x); /* log1p(x<-1)=NaN */ - } - if(ax<0x31000000) { /* |x| < 2**-29 */ - math_force_eval(two25+x); /* raise inexact */ - if (ax<0x24800000) /* |x| < 2**-54 */ - { - math_check_force_underflow (x); - return x; - } - else - return x - x*x*(float)0.5; - } - if(hx>0||hx<=((int32_t)0xbe95f61f)) { - k=0;f=x;hu=1;} /* -0.2929<x<0.41422 */ + double z = x; + uint32_t ux = asuint (x); + uint32_t ax = ux & (~0u >> 1); + if (__glibc_likely (ax < 0x3c880000)) + { + if (__glibc_unlikely (ax < 0x33000000)) + { + if (!ax) + return x; + return fmaf (x, -x, x); } - if (hx >= 0x7f800000) return x+x; - if(k!=0) { - if(hx<0x5a000000) { - u = (float)1.0+x; - GET_FLOAT_WORD(hu,u); - k = (hu>>23)-127; - /* correction term */ - c = (k>0)? (float)1.0-(u-x):x-(u-(float)1.0); - c /= u; - } else { - u = x; - GET_FLOAT_WORD(hu,u); - k = (hu>>23)-127; - c = 0; - } - hu &= 0x007fffff; - if(hu<0x3504f7) { - SET_FLOAT_WORD(u,hu|0x3f800000);/* normalize u */ - } else { - k += 1; - SET_FLOAT_WORD(u,hu|0x3f000000); /* normalize u/2 */ - hu = (0x00800000-hu)>>2; + double z2 = z * z, z4 = z2 * z2; + double f = z2 + * ((b[1] + z * b[2]) + z2 * (b[3] + z * b[4]) + + z4 * ((b[5] + z * b[6]) + z2 * b[7])); + double r = z + f; + if (__glibc_unlikely ((asuint64 (r) & 0xfffffffll) == 0)) + r += 0x1p14 * (f + (z - r)); + return r; + } + else + { + if (__glibc_unlikely (ux >= 0xbf800000u || ax >= 0x7f800000)) + return as_special (x); + uint64_t tp = asuint64 (z + 1); + int e = tp >> 52; + uint64_t m52 = tp & (~(uint64_t) 0 >> 12); + unsigned int j = (tp >> (52 - 5)) & 31; + e -= 0x3ff; + double xd = asdouble (m52 | ((uint64_t) 0x3ff << 52)); + z = xd * x0[j] - 1; + static const double c[] = + { + -0x1.3902c33434e7fp-43, 0x1.ffffffe1cbed5p-1, -0x1.ffffff7d1b014p-2, + 0x1.5564e0ed3613ap-2, -0x1.0012232a00d4ap-2 + }; + const double ln2 = 0x1.62e42fefa39efp-1; + double z2 = z * z, + r = (ln2 * e + lixb[j]) + + z * ((c[1] + z * c[2]) + z2 * (c[3] + z * c[4])); + float ub = r; + float lb = r + 2.2e-11; + if (__glibc_unlikely (ub != lb)) + { + double z4 = z2 * z2, + f = z + * ((b[0] + z * b[1]) + z2 * (b[2] + z * b[3]) + + z4 * ((b[4] + z * b[5]) + z2 * (b[6] + z * b[7]))); + const double ln2l = 0x1.7f7d1cf79abcap-20, ln2h = 0x1.62e4p-1; + double Lh = ln2h * e; + double Ll = ln2l * e; + double rl = f + Ll + lix[j]; + double tr = rl + Lh; + if (__glibc_unlikely ((asuint64 (tr) & 0xfffffffll) == 0)) + { + if (x == -0x1.247ab0p-6) + return -0x1.271f0ep-6f - 0x1p-31f; + if (x == -0x1.3a415ep-5) + return -0x1.407112p-5f + 0x1p-30f; + if (x == 0x1.fb035ap-2) + return 0x1.9bddc2p-2f + 0x1p-27f; + tr += 64 * (rl + (Lh - tr)); } - f = u-(float)1.0; - } - hfsq=(float)0.5*f*f; - if(hu==0) { /* |f| < 2**-20 */ - if(f==zero) { - if(k==0) return zero; - else {c += k*ln2_lo; return k*ln2_hi+c;} + else if (rl + (Lh - tr) == 0.0) + { + if (x == 0x1.b7fd86p-4) + return 0x1.a1ece2p-4f + 0x1p-29f; + if (x == -0x1.3a415ep-5) + return -0x1.407112p-5f + 0x1p-30f; + if (x == 0x1.43c7e2p-6) + return 0x1.409f80p-6f + 0x1p-31f; } - R = hfsq*(1.0f-0.66666666666666666f*f); - if(k==0) return f-R; else - return k*ln2_hi-((R-(k*ln2_lo+c))-f); + ub = tr; } - s = f/((float)2.0+f); - z = s*s; - R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); - if (k == 0) - return f - (hfsq - s * (hfsq + R)); - else - { - /* With GCC 7 when compiling with -Os the compiler warns - that c might be used uninitialized. This can't be true - because k must be 0 for c to be uninitialized and we - handled that computation earlier without using c. */ - DIAG_PUSH_NEEDS_COMMENT; - DIAG_IGNORE_Os_NEEDS_COMMENT (7, "-Wmaybe-uninitialized"); - return k * ln2_hi - ((hfsq - (s * (hfsq + R) - + (k * ln2_lo + c))) - f); - DIAG_POP_NEEDS_COMMENT; - } + return ub; + } } +libm_alias_float (__log1p, log1p) +strong_alias (__log1pf, __logp1f) +libm_alias_float (__logp1, logp1) diff --git a/sysdeps/ieee754/flt-32/s_log2p1f.c b/sysdeps/ieee754/flt-32/s_log2p1f.c new file mode 100644 index 0000000000..09e77dc08a --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_log2p1f.c @@ -0,0 +1,248 @@ +/* Correctly-rounded biased argument natural logarithm function for binary32 + value. + +Copyright (c) 2022-2024 Alexei Sibidanov. + +This file is part of the CORE-MATH project +project (file src/binary32/log2p1/log2p1f.c revision bc385c2). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <errno.h> +#include <math.h> +#include <math-underflow.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__log2p1f (float x) +{ + static const double ix[] = + { + 0x1p+0, 0x1.fc07f01fcp-1, 0x1.f81f81f82p-1, + 0x1.f44659e4ap-1, 0x1.f07c1f07cp-1, 0x1.ecc07b302p-1, + 0x1.e9131abfp-1, 0x1.e573ac902p-1, 0x1.e1e1e1e1ep-1, + 0x1.de5d6e3f8p-1, 0x1.dae6076bap-1, 0x1.d77b654b8p-1, + 0x1.d41d41d42p-1, 0x1.d0cb58f6ep-1, 0x1.cd8568904p-1, + 0x1.ca4b3055ep-1, 0x1.c71c71c72p-1, 0x1.c3f8f01c4p-1, + 0x1.c0e070382p-1, 0x1.bdd2b8994p-1, 0x1.bacf914c2p-1, + 0x1.b7d6c3ddap-1, 0x1.b4e81b4e8p-1, 0x1.b2036406cp-1, + 0x1.af286bca2p-1, 0x1.ac5701ac6p-1, 0x1.a98ef606ap-1, + 0x1.a6d01a6dp-1, 0x1.a41a41a42p-1, 0x1.a16d3f97ap-1, + 0x1.9ec8e951p-1, 0x1.9c2d14ee4p-1, 0x1.99999999ap-1, + 0x1.970e4f80cp-1, 0x1.948b0fcd6p-1, 0x1.920fb49dp-1, + 0x1.8f9c18f9cp-1, 0x1.8d3018d3p-1, 0x1.8acb90f6cp-1, + 0x1.886e5f0acp-1, 0x1.861861862p-1, 0x1.83c977ab2p-1, + 0x1.818181818p-1, 0x1.7f405fd02p-1, 0x1.7d05f417ep-1, + 0x1.7ad2208ep-1, 0x1.78a4c8178p-1, 0x1.767dce434p-1, + 0x1.745d1745ep-1, 0x1.724287f46p-1, 0x1.702e05c0cp-1, + 0x1.6e1f76b44p-1, 0x1.6c16c16c2p-1, 0x1.6a13cd154p-1, + 0x1.681681682p-1, 0x1.661ec6a52p-1, 0x1.642c8590cp-1, + 0x1.623fa7702p-1, 0x1.605816058p-1, 0x1.5e75bb8dp-1, + 0x1.5c9882b94p-1, 0x1.5ac056b02p-1, 0x1.58ed23082p-1, + 0x1.571ed3c5p-1, 0x1.555555556p-1, 0x1.5390948f4p-1, + 0x1.51d07eae2p-1, 0x1.501501502p-1, 0x1.4e5e0a73p-1, + 0x1.4cab88726p-1, 0x1.4afd6a052p-1, 0x1.49539e3b2p-1, + 0x1.47ae147aep-1, 0x1.460cbc7f6p-1, 0x1.446f86562p-1, + 0x1.42d6625d6p-1, 0x1.414141414p-1, 0x1.3fb013fbp-1, + 0x1.3e22cbce4p-1, 0x1.3c995a47cp-1, 0x1.3b13b13b2p-1, + 0x1.3991c2c18p-1, 0x1.381381382p-1, 0x1.3698df3dep-1, + 0x1.3521cfb2cp-1, 0x1.33ae45b58p-1, 0x1.323e34a2cp-1, + 0x1.30d19013p-1, 0x1.2f684bda2p-1, 0x1.2e025c04cp-1, + 0x1.2c9fb4d82p-1, 0x1.2b404ad02p-1, 0x1.29e4129e4p-1, + 0x1.288b01288p-1, 0x1.27350b882p-1, 0x1.25e22708p-1, + 0x1.24924924ap-1, 0x1.23456789ap-1, 0x1.21fb78122p-1, + 0x1.20b470c68p-1, 0x1.1f7047dc2p-1, 0x1.1e2ef3b4p-1, + 0x1.1cf06ada2p-1, 0x1.1bb4a4046p-1, 0x1.1a7b9611ap-1, + 0x1.19453808cp-1, 0x1.181181182p-1, 0x1.16e068942p-1, + 0x1.15b1e5f76p-1, 0x1.1485f0e0ap-1, 0x1.135c81136p-1, + 0x1.12358e75ep-1, 0x1.111111112p-1, 0x1.0fef010fep-1, + 0x1.0ecf56be6p-1, 0x1.0db20a89p-1, 0x1.0c9714fbcp-1, + 0x1.0b7e6ec26p-1, 0x1.0a6810a68p-1, 0x1.0953f3902p-1, + 0x1.084210842p-1, 0x1.073260a48p-1, 0x1.0624dd2f2p-1, + 0x1.05197f7d8p-1, 0x1.041041042p-1, 0x1.03091b52p-1, + 0x1.020408102p-1, 0x1.01010101p-1, 0x1p-1 + }; + + static const double lix[] = { + 0x0p+0, -0x1.6fe50b6f1eafap-7, -0x1.6e79685c160d5p-6, + -0x1.11cd1d51955bap-5, -0x1.6bad37591e03p-5, -0x1.c4dfab908ddb5p-5, + -0x1.0eb389fab4795p-4, -0x1.3aa2fdd26ae99p-4, -0x1.663f6faca846bp-4, + -0x1.918a16e4cb157p-4, -0x1.bc84240a78a13p-4, -0x1.e72ec1181cfb1p-4, + -0x1.08c588cd964e4p-3, -0x1.1dcd19759f2e3p-3, -0x1.32ae9e27627c6p-3, + -0x1.476a9f989a58ap-3, -0x1.5c01a39fa6533p-3, -0x1.70742d4eed455p-3, + -0x1.84c2bd02d6434p-3, -0x1.98edd077e9f0ap-3, -0x1.acf5e2db31eeap-3, + -0x1.c0db6cddaa82dp-3, -0x1.d49ee4c33121ap-3, -0x1.e840be751d775p-3, + -0x1.fbc16b9003e0bp-3, -0x1.0790adbae3fcp-2, -0x1.11307dad465b5p-2, + -0x1.1ac05b2924cc5p-2, -0x1.24407ab0cc41p-2, -0x1.2db10fc4ea424p-2, + -0x1.37124cea58697p-2, -0x1.406463b1d455dp-2, -0x1.49a784bcbaa37p-2, + -0x1.52dbdfc4f341dp-2, -0x1.5c01a39ff2c9bp-2, -0x1.6518fe46abaa5p-2, + -0x1.6e221cd9d6933p-2, -0x1.771d2ba7f5791p-2, -0x1.800a56315ee2ap-2, + -0x1.88e9c72df8611p-2, -0x1.91bba891d495fp-2, -0x1.9a8023920fa4dp-2, + -0x1.a33760a7fbca6p-2, -0x1.abe18797d2effp-2, -0x1.b47ebf734b923p-2, + -0x1.bd0f2e9eb2b84p-2, -0x1.c592fad2be1aap-2, -0x1.ce0a4923cf5e6p-2, + -0x1.d6753e02f4ebcp-2, -0x1.ded3fd445afp-2, -0x1.e726aa1e558fep-2, + -0x1.ef6d67325ba38p-2, -0x1.f7a8568c8aea6p-2, -0x1.ffd799a81be87p-2, + 0x1.f804ae8d33c4p-2, 0x1.efec61b04af4ep-2, 0x1.e7df5fe572606p-2, + 0x1.dfdd89d5b0009p-2, 0x1.d7e6c0abbd924p-2, 0x1.cffae611a74d6p-2, + 0x1.c819dc2d8578cp-2, 0x1.c043859e5bdbcp-2, 0x1.b877c57b47c04p-2, + 0x1.b0b67f4f29a66p-2, 0x1.a8ff97183ed07p-2, 0x1.a152f14293c74p-2, + 0x1.99b072a9289cap-2, 0x1.921800927e284p-2, 0x1.8a8980ac4113p-2, + 0x1.8304d90c2859dp-2, 0x1.7b89f02cbd49ap-2, 0x1.7418aceb84ab1p-2, + 0x1.6cb0f68656c95p-2, 0x1.6552b49993dc2p-2, 0x1.5dfdcf1eacd7bp-2, + 0x1.56b22e6b97c18p-2, 0x1.4f6fbb2ce6943p-2, 0x1.48365e6957b42p-2, + 0x1.4106017c0dbcfp-2, 0x1.39de8e15727d9p-2, 0x1.32bfee37489bcp-2, + 0x1.2baa0c34989c3p-2, 0x1.249cd2b177fd5p-2, 0x1.1d982c9d50468p-2, + 0x1.169c0536677acp-2, 0x1.0fa848045f67bp-2, 0x1.08bce0d9a7c6p-2, + 0x1.01d9bbcf66a2cp-2, 0x1.f5fd8a90e2d85p-3, 0x1.e857d3d3af1e5p-3, + 0x1.dac22d3ec5f4ep-3, 0x1.cd3c712db459ap-3, 0x1.bfc67a7ff3c22p-3, + 0x1.b2602497678f4p-3, 0x1.a5094b555a1f8p-3, 0x1.97c1cb136b96fp-3, + 0x1.8a8980ac8652dp-3, 0x1.7d60496c83f66p-3, 0x1.7046031c7cdafp-3, + 0x1.633a8bf460335p-3, 0x1.563dc2a08b102p-3, 0x1.494f863bbc1dep-3, + 0x1.3c6fb6507a37ep-3, 0x1.2f9e32d5257ecp-3, 0x1.22dadc2a627efp-3, + 0x1.1625931802e49p-3, 0x1.097e38cef9519p-3, 0x1.f9c95dc138295p-4, + 0x1.e0b1ae90505f6p-4, 0x1.c7b528b5fcffap-4, 0x1.aed391abb17a1p-4, + 0x1.960caf9bd35eap-4, 0x1.7d60496e3edebp-4, 0x1.64ce26bf2108ep-4, + 0x1.4c560fe5b573bp-4, 0x1.33f7cde24adfbp-4, 0x1.1bb32a5ed9353p-4, + 0x1.0387efbd3006ep-4, 0x1.d6ebd1f1d0955p-5, 0x1.a6f9c37a8beabp-5, + 0x1.77394c9d6762cp-5, 0x1.47aa07358e1a4p-5, 0x1.184b8e4d490efp-5, + 0x1.d23afc4d95c78p-6, 0x1.743ee8678a7cbp-6, 0x1.16a21e243bf78p-6, + 0x1.72c7ba20c907ep-7, 0x1.720d9c0536e17p-8, 0x0p+0 + }; + + double z = x; + uint32_t ux = asuint (x); + uint32_t ax = ux & (~0u >> 1); + if (__glibc_unlikely (ux >= 0x17fu << 23)) + { /* x <= -1 */ + if (ux == (0x17fu << 23)) + return __math_divzerof (1); + if (ux > (0x1ffu << 23)) + return x + x; /* nan */ + return __math_invalidf (x); + } + else if (__glibc_unlikely (ax >= (0xff << 23))) + { /* +inf, nan */ + if (ax > (0xff << 23)) + return x + x; /* nan */ + return INFINITY; + } + else if (__glibc_likely (ax < 0x3cb7aa26u)) + { /* |x| < 0x1.6f544cp-6 */ + double z2 = z * z, z4 = z2 * z2; + if ( __glibc_likely (ax < 0x3b9d9d34u)) + { /* |x| < 0x1.3b3a68p-8 */ + if (__glibc_likely (ax < 0x39638a7eu)) + { /* |x| < 0x1.c714fcp-13 */ + if (__glibc_likely (ax < 0x329c5639u)) + { /* |x| < 0x1.38ac72p-26 */ + static const double c[] = + { + 0x1.71547652b82fep+0, -0x1.71547652b82ffp-1 + }; + return z * (c[0] + z * c[1]); + } + else + { + if (__glibc_unlikely (ux == 0x32ff7045u)) + return 0x1.70851ap-25f - 0x1.8p-80f; + if (__glibc_unlikely (ux == 0xb395efbbu)) + return -0x1.b0a00ap-24f + 0x1p-76f; + if (__glibc_unlikely (ux == 0x35a14df7u)) + return 0x1.d16d2p-20f + 0x1p-72f; + if (__glibc_unlikely (ux == 0x3841cb81u)) + return 0x1.17949ep-14f + 0x1p-67f; + static const double c[] = + { + 0x1.71547652b82fep+0, -0x1.71547652b82fdp-1, + 0x1.ec709ead0c9a7p-2, -0x1.7154773c1cb29p-2 + }; + return z * ((c[0] + z * c[1]) + z2 * (c[2] + z * c[3])); + } + } + else + { + if (__glibc_unlikely (ux == 0xbac9363du)) + return -0x1.2282aap-9f + 0x1p-61f; + static const double c[] = + { + 0x1.71547652b82fep+0, -0x1.71547652b83p-1, + 0x1.ec709dc28f51bp-2, -0x1.7154765157748p-2, + 0x1.2778a510a3682p-2, -0x1.ec745df1551fcp-3 + }; + return z + * ((c[0] + z * c[1]) + z2 * (c[2] + z * c[3]) + + z4 * ((c[4] + z * c[5]))); + } + } + else + { + static const double c[] = + { + 0x1.71547652b82fep+0, -0x1.71547652b82fbp-1, + 0x1.ec709dc3b6a73p-2, -0x1.71547652dc09p-2, + 0x1.2776c1a88901p-2, -0x1.ec7095bd4d208p-3, + 0x1.a66bec7fc8f7p-3, -0x1.71a900fc3f3f9p-3 + }; + return z + * ((c[0] + z * c[1]) + z2 * (c[2] + z * c[3]) + + z4 * ((c[4] + z * c[5]) + z2 * (c[6] + z * c[7]))); + } + } + else + { /* |x| >= 0x1.6f544cp-6 */ + float h, l; + /* With gcc 6.3.0, if we return 0x1.e90026p+4f + 0x1.fp-21 + in the second exceptional case, with rounding up it yields + 0x1.e90026p+4 which is incorrect, thus we use this workaround. See + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112367. */ + if (__glibc_unlikely (ux == 0x52928e33u)) + { + h = 0x1.318ffap+5f; + l = 0x1.fp-20f; + return h + l; + } + if (__glibc_unlikely (ux == 0x4ebd09e3u)) + { + h = 0x1.e90026p+4f; + l = 0x1.fp-21; + return h + l; + } + uint64_t tp = asuint64 (z + 1.0); + uint64_t m = tp & (~(uint64_t) 0 >> 12); + int e = (tp >> 52) - 0x3ff; + int j = (m + ((int64_t) 1 << (52 - 8))) >> (52 - 7), k = j > 53; + e += k; + double xd = asdouble (m | (uint64_t) 0x3ff << 52); + z = fma (xd, ix[j], -1.0); + static const double c[] = + { + 0x1.71547652b82fep+0, -0x1.71547652b82ffp-1, 0x1.ec709dc32988bp-2, + -0x1.715476521ec2bp-2, 0x1.277801a1ad904p-2, -0x1.ec731704d6a88p-3 + }; + double z2 = z * z; + double c0 = c[0] + z * c[1]; + double c2 = c[2] + z * c[3]; + double c4 = c[4] + z * c[5]; + c0 += z2 * (c2 + z2 * c4); + return (z * c0 - lix[j]) + e; + } +} +libm_alias_float (__log2p1, log2p1) diff --git a/sysdeps/m68k/m680x0/fpu/math_errf.c b/sysdeps/ieee754/flt-32/w_log1pf.c index 1cc8931700..1cc8931700 100644 --- a/sysdeps/m68k/m680x0/fpu/math_errf.c +++ b/sysdeps/ieee754/flt-32/w_log1pf.c diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h index d10f4a8800..3daf991001 100644 --- a/sysdeps/loongarch/dl-tlsdesc-dynamic.h +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h @@ -19,9 +19,6 @@ <https://www.gnu.org/licenses/>. */ #define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) /* Handler for dynamic TLS symbols. Prototype: @@ -155,7 +152,7 @@ Hign address dynamic_block1 <----- dtv5 */ #ifdef USE_LASX #define V_REG_S xvst #define V_REG_L xvld - #define V_SPACE FRAME_SIZE_LASX + #define V_SPACE (-((-32 * SZXREG) & ALMASK)) /* Space for LASX registers. */ #define V_REG(n) $xr##n #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 @@ -163,7 +160,7 @@ Hign address dynamic_block1 <----- dtv5 */ #elif defined USE_LSX #define V_REG_S vst #define V_REG_L vld - #define V_SPACE FRAME_SIZE_LSX + #define V_SPACE (-((-32 * SZVREG) & ALMASK)) /* Space for LSX registers. */ #define V_REG(n) $vr##n #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 @@ -171,7 +168,7 @@ Hign address dynamic_block1 <----- dtv5 */ #else #define V_REG_S fst.d #define V_REG_L fld.d - #define V_SPACE FRAME_SIZE_FLOAT + #define V_SPACE (-((-24 * SZFREG) & ALMASK)) /* Space for FLOAT registers. */ #define V_REG(n) $f##n #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 #define V_REGSZ SZFREG diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S index b6cfd6121d..be76c07cce 100644 --- a/sysdeps/loongarch/dl-tlsdesc.S +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -69,6 +69,12 @@ _dl_tlsdesc_undefweak: #define Lslow Lslow_lasx #include "dl-tlsdesc-dynamic.h" #undef FRAME_SIZE +#undef V_REG_S +#undef V_REG_L +#undef V_SPACE +#undef V_REG +#undef V_REGS +#undef V_REGSZ #undef USE_LASX #undef _dl_tlsdesc_dynamic #undef Lret @@ -80,6 +86,12 @@ _dl_tlsdesc_undefweak: #define Lslow Lslow_lsx #include "dl-tlsdesc-dynamic.h" #undef FRAME_SIZE +#undef V_REG_S +#undef V_REG_L +#undef V_SPACE +#undef V_REG +#undef V_REGS +#undef V_REGSZ #undef USE_LSX #undef _dl_tlsdesc_dynamic #undef Lret diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps index 833dca40e4..ecd9cc5873 100644 --- a/sysdeps/loongarch/lp64/libm-test-ulps +++ b/sysdeps/loongarch/lp64/libm-test-ulps @@ -977,22 +977,18 @@ ldouble: 3 Function: "exp10m1": double: 4 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 6 Function: "exp10m1_upward": double: 5 -float: 3 ldouble: 6 Function: "exp2": @@ -1014,22 +1010,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 4 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 5 Function: "exp_downward": @@ -1046,22 +1038,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1207,42 +1195,34 @@ ldouble: 1 Function: "log10p1": double: 2 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 4 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 3 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 4 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 2 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1264,22 +1244,18 @@ ldouble: 1 Function: "log2p1": double: 2 -float: 2 ldouble: 3 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 3 Function: "log_downward": @@ -1294,22 +1270,18 @@ ldouble: 2 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 2 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1432,22 +1404,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/loongarch/nptl/thread_pointer.h b/sysdeps/loongarch/nptl/thread_pointer.h new file mode 100644 index 0000000000..cc96062c93 --- /dev/null +++ b/sysdeps/loongarch/nptl/thread_pointer.h @@ -0,0 +1,30 @@ +/* __thread_pointer definition. loongarch version. + Copyright (C) 2021-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_THREAD_POINTER_H +#define _SYS_THREAD_POINTER_H + +register void *__thread_self asm ("$tp"); + +static inline void * +__thread_pointer (void) +{ + return __thread_self; +} + +#endif /* _SYS_THREAD_POINTER_H */ diff --git a/sysdeps/loongarch/nptl/tls.h b/sysdeps/loongarch/nptl/tls.h index ac1a92ea7b..675c0d27c1 100644 --- a/sysdeps/loongarch/nptl/tls.h +++ b/sysdeps/loongarch/nptl/tls.h @@ -26,8 +26,8 @@ #include <stddef.h> #include <stdint.h> #include <dl-dtv.h> +#include "thread_pointer.h" -register void *__thread_self asm ("$tp"); #define READ_THREAD_POINTER() ({ __thread_self; }) /* Get system call information. */ diff --git a/sysdeps/m68k/coldfire/fpu/libm-test-ulps b/sysdeps/m68k/coldfire/fpu/libm-test-ulps index 1b25a70e3f..8130d491e8 100644 --- a/sysdeps/m68k/coldfire/fpu/libm-test-ulps +++ b/sysdeps/m68k/coldfire/fpu/libm-test-ulps @@ -131,12 +131,6 @@ Function: "log10": double: 1 float: 2 -Function: "log1p": -float: 1 - -Function: "logp1": -float: 1 - Function: "sincos": double: 1 float: 1 @@ -146,7 +140,6 @@ double: 1 Function: "tgamma": double: 1 -float: 1 Function: "y0": double: 2 diff --git a/sysdeps/m68k/m680x0/fpu/libm-test-ulps b/sysdeps/m68k/m680x0/fpu/libm-test-ulps index 6eacfb6b6f..3964b83b81 100644 --- a/sysdeps/m68k/m680x0/fpu/libm-test-ulps +++ b/sysdeps/m68k/m680x0/fpu/libm-test-ulps @@ -882,19 +882,15 @@ ldouble: 3 Function: "exp10m1": double: 1 -float: 1 Function: "exp10m1_downward": double: 1 -float: 1 Function: "exp10m1_towardzero": double: 1 -float: 1 Function: "exp10m1_upward": double: 3 -float: 1 Function: "exp2": double: 1 @@ -907,19 +903,15 @@ double: 1 Function: "exp2m1": double: 1 -float: 1 Function: "exp2m1_downward": double: 2 -float: 1 Function: "exp2m1_towardzero": double: 2 -float: 1 Function: "exp2m1_upward": double: 1 -float: 1 Function: "exp_upward": double: 1 @@ -1099,19 +1091,15 @@ double: 1 Function: "log2p1": double: 1 -float: 2 Function: "log2p1_downward": double: 1 -float: 2 Function: "log2p1_towardzero": double: 1 -float: 2 Function: "log2p1_upward": double: 1 -float: 2 Function: "logp1": double: 1 @@ -1208,22 +1196,18 @@ float: 1 Function: "tgamma": double: 3 -float: 9 ldouble: 9 Function: "tgamma_downward": double: 3 -float: 9 ldouble: 9 Function: "tgamma_towardzero": double: 3 -float: 9 ldouble: 9 Function: "tgamma_upward": double: 2 -float: 9 ldouble: 9 Function: "y0": diff --git a/sysdeps/m68k/m680x0/fpu/w_log1pf.c b/sysdeps/m68k/m680x0/fpu/w_log1pf.c new file mode 100644 index 0000000000..5f363cc66b --- /dev/null +++ b/sysdeps/m68k/m680x0/fpu/w_log1pf.c @@ -0,0 +1,20 @@ +/* Wrapper for __log1p that handles setting errno. + Copyright (C) 2015-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <math-type-macros-float.h> +#include <w_log1p_template.c> diff --git a/sysdeps/mach/hurd/faccessat.c b/sysdeps/mach/hurd/faccessat.c index 998e31962f..6d3d123abb 100644 --- a/sysdeps/mach/hurd/faccessat.c +++ b/sysdeps/mach/hurd/faccessat.c @@ -185,6 +185,15 @@ __faccessat_common (int fd, const char *file, int type, int at_flags, return errfunc (err); } + /* If all we wanted was to check for a file existing at the path, + then we already got our answer, and we don't need to call + file_check_access (). */ + if (type == F_OK) + { + __mach_port_deallocate (__mach_task_self (), io); + return 0; + } + /* Find out what types of access we are allowed to this file. */ err = __file_check_access (io, &allowed); __mach_port_deallocate (__mach_task_self (), io); diff --git a/sysdeps/mach/hurd/fcntl.c b/sysdeps/mach/hurd/fcntl.c index 9451ed5649..2583f54ef2 100644 --- a/sysdeps/mach/hurd/fcntl.c +++ b/sysdeps/mach/hurd/fcntl.c @@ -148,6 +148,7 @@ __libc_fcntl (int fd, int cmd, ...) cmd = F_SETLKW64; break; default: + va_end (ap); return __hurd_fail (EINVAL); } @@ -204,7 +205,10 @@ __libc_fcntl (int fd, int cmd, ...) && fl->l_start != fl64.l_start) || (sizeof fl->l_len != sizeof fl64.l_len && fl->l_len != fl64.l_len)) - return __hurd_fail (EOVERFLOW); + { + va_end (ap); + return __hurd_fail (EOVERFLOW); + } } result = err ? __hurd_dfail (fd, err) : 0; diff --git a/sysdeps/mach/hurd/x86_64/vm_param.h b/sysdeps/mach/hurd/x86_64/vm_param.h index 460445f49e..a6e63a3a1a 100644 --- a/sysdeps/mach/hurd/x86_64/vm_param.h +++ b/sysdeps/mach/hurd/x86_64/vm_param.h @@ -15,7 +15,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef _X86_86_VM_PARAM_H +#ifndef _X86_64_VM_PARAM_H #define _X86_64_VM_PARAM_H /* Arbitrary start of the brk. This is after usual binary and library mappings. */ diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps index d3666eb7d4..328e31582b 100644 --- a/sysdeps/microblaze/libm-test-ulps +++ b/sysdeps/microblaze/libm-test-ulps @@ -191,7 +191,6 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "gamma": double: 4 @@ -225,7 +224,6 @@ float: 2 Function: "log1p": double: 1 -float: 1 Function: "log2": double: 2 @@ -233,7 +231,6 @@ float: 1 Function: "logp1": double: 1 -float: 1 Function: "pow": float: 1 @@ -257,7 +254,6 @@ float: 2 Function: "tgamma": double: 5 -float: 4 Function: "y0": double: 2 diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps index d95230724b..c319e0642c 100644 --- a/sysdeps/mips/mips32/libm-test-ulps +++ b/sysdeps/mips/mips32/libm-test-ulps @@ -779,19 +779,15 @@ float: 1 Function: "exp10m1": double: 3 -float: 2 Function: "exp10m1_downward": double: 3 -float: 3 Function: "exp10m1_towardzero": double: 2 -float: 3 Function: "exp10m1_upward": double: 4 -float: 4 Function: "exp2": double: 1 @@ -811,19 +807,15 @@ float: 1 Function: "exp2m1": double: 2 -float: 2 Function: "exp2m1_downward": double: 3 -float: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 Function: "exp2m1_upward": double: 3 -float: 3 Function: "exp_downward": double: 1 @@ -839,19 +831,15 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -967,35 +955,27 @@ float: 2 Function: "log10p1": double: 1 -float: 2 Function: "log10p1_downward": double: 2 -float: 3 Function: "log10p1_towardzero": double: 3 -float: 2 Function: "log10p1_upward": double: 2 -float: 3 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 2 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": double: 2 @@ -1015,22 +995,18 @@ float: 3 Function: "log2p1": double: 1 -float: 2 ldouble: 1 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 2 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 1 Function: "log_downward": @@ -1045,19 +1021,15 @@ float: 2 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 2 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1156,19 +1128,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps index ce33d9b4b3..365b860c54 100644 --- a/sysdeps/mips/mips64/libm-test-ulps +++ b/sysdeps/mips/mips64/libm-test-ulps @@ -978,22 +978,18 @@ ldouble: 3 Function: "exp10m1": double: 3 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 6 Function: "exp10m1_upward": double: 4 -float: 4 ldouble: 6 Function: "exp2": @@ -1018,22 +1014,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 4 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 5 Function: "exp_downward": @@ -1050,22 +1042,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1212,42 +1200,34 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 4 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 3 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 4 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 2 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1272,22 +1252,18 @@ ldouble: 1 Function: "log2p1": double: 1 -float: 2 ldouble: 3 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 3 Function: "log_downward": @@ -1305,22 +1281,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 2 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1444,22 +1416,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps index c8d1a722f7..5240767c0e 100644 --- a/sysdeps/nios2/libm-test-ulps +++ b/sysdeps/nios2/libm-test-ulps @@ -197,7 +197,6 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "gamma": double: 4 @@ -231,7 +230,6 @@ float: 2 Function: "log1p": double: 1 -float: 1 Function: "log2": double: 2 @@ -239,7 +237,6 @@ float: 1 Function: "logp1": double: 1 -float: 1 Function: "pow": double: 1 @@ -266,7 +263,6 @@ float: 2 Function: "tgamma": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c index ef199ddbc3..52c90e61e3 100644 --- a/sysdeps/nptl/_Fork.c +++ b/sysdeps/nptl/_Fork.c @@ -17,11 +17,18 @@ <https://www.gnu.org/licenses/>. */ #include <arch-fork.h> +#include <libc-lock.h> #include <pthreadP.h> pid_t _Fork (void) { + /* Block all signals to avoid revealing the inconsistent TCB state + to a signal handler after fork. The abort lock should AS-safe + to avoid deadlock if _Fork is called from a signal handler. */ + internal_sigset_t original_sigmask; + __abort_lock_rdlock (&original_sigmask); + pid_t pid = arch_fork (&THREAD_SELF->tid); if (pid == 0) { @@ -44,6 +51,8 @@ _Fork (void) INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head, sizeof (struct robust_list_head)); } + + __abort_lock_unlock (&original_sigmask); return pid; } libc_hidden_def (_Fork) diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h index 7d9b95e6ac..c2db165052 100644 --- a/sysdeps/nptl/pthreadP.h +++ b/sysdeps/nptl/pthreadP.h @@ -517,6 +517,7 @@ libc_hidden_proto (__pthread_kill) extern int __pthread_cancel (pthread_t th); extern int __pthread_kill_internal (pthread_t threadid, int signo) attribute_hidden; +extern int __pthread_raise_internal (int signo) attribute_hidden; extern void __pthread_exit (void *value) __attribute__ ((__noreturn__)); libc_hidden_proto (__pthread_exit) extern int __pthread_join (pthread_t threadid, void **thread_return); diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps index 59b9f072f5..9ced4b0052 100644 --- a/sysdeps/or1k/fpu/libm-test-ulps +++ b/sysdeps/or1k/fpu/libm-test-ulps @@ -804,19 +804,15 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -1066,19 +1062,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 9 Function: "tgamma_towardzero": double: 9 -float: 8 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps index 726855faaa..c7ae0f002b 100644 --- a/sysdeps/or1k/nofpu/libm-test-ulps +++ b/sysdeps/or1k/nofpu/libm-test-ulps @@ -802,19 +802,15 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_downward": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "expm1_upward": double: 1 -float: 1 Function: "gamma": double: 4 @@ -927,19 +923,15 @@ float: 2 Function: "log1p": double: 1 -float: 1 Function: "log1p_downward": double: 2 -float: 2 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log1p_upward": double: 2 -float: 2 Function: "log2": float: 1 @@ -955,19 +947,15 @@ double: 1 Function: "logp1": double: 1 -float: 1 Function: "logp1_downward": double: 2 -float: 2 Function: "logp1_towardzero": double: 2 -float: 2 Function: "logp1_upward": double: 2 -float: 2 Function: "pow": double: 1 @@ -1064,19 +1052,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 9 Function: "tgamma_towardzero": double: 9 -float: 8 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/posix/profil.c b/sysdeps/posix/profil.c index 3b3c87e25a..6f4aa00eeb 100644 --- a/sysdeps/posix/profil.c +++ b/sysdeps/posix/profil.c @@ -82,7 +82,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale) if (__setitimer (ITIMER_PROF, &otimer, NULL) < 0) return -1; samples = NULL; - return __sigaction (SIGPROF, &oact, NULL); + return __libc_sigaction (SIGPROF, &oact, NULL); } if (samples) @@ -90,7 +90,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale) /* Was already turned on. Restore old timer and signal handler first. */ if (__setitimer (ITIMER_PROF, &otimer, NULL) < 0 - || __sigaction (SIGPROF, &oact, NULL) < 0) + || __libc_sigaction (SIGPROF, &oact, NULL) < 0) return -1; } #else @@ -114,7 +114,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale) #endif act.sa_flags |= SA_RESTART; __sigfillset (&act.sa_mask); - if (__sigaction (SIGPROF, &act, oact_ptr) < 0) + if (__libc_sigaction (SIGPROF, &act, oact_ptr) < 0) return -1; timer.it_value.tv_sec = 0; diff --git a/sysdeps/posix/tempname.c b/sysdeps/posix/tempname.c index c00fe0c181..fc30958a0c 100644 --- a/sysdeps/posix/tempname.c +++ b/sysdeps/posix/tempname.c @@ -117,6 +117,8 @@ random_bits (random_value *r, random_value s) succeed. */ #if !_LIBC *r = mix_random_values (v, clock ()); +#else + *r = v; #endif return false; } diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps index 2b5d2b940d..8d0c18eed1 100644 --- a/sysdeps/powerpc/fpu/libm-test-ulps +++ b/sysdeps/powerpc/fpu/libm-test-ulps @@ -870,6 +870,7 @@ float128: 1 ldouble: 2 Function: Imaginary part of "csin": +float: 1 float128: 1 ldouble: 1 @@ -1186,25 +1187,21 @@ ldouble: 4 Function: "exp10m1": double: 4 -float: 2 float128: 3 ldouble: 4 Function: "exp10m1_downward": double: 3 -float: 3 float128: 6 ldouble: 9 Function: "exp10m1_towardzero": double: 2 -float: 3 float128: 6 ldouble: 9 Function: "exp10m1_upward": double: 5 -float: 3 float128: 6 ldouble: 8 @@ -1231,25 +1228,21 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 float128: 2 ldouble: 3 Function: "exp2m1_downward": double: 3 -float: 3 float128: 3 ldouble: 7 Function: "exp2m1_towardzero": double: 3 -float: 2 float128: 4 ldouble: 8 Function: "exp2m1_upward": double: 3 -float: 3 float128: 5 ldouble: 9 @@ -1270,25 +1263,21 @@ ldouble: 1 Function: "expm1": double: 1 -float: 1 float128: 2 ldouble: 1 Function: "expm1_downward": double: 1 -float: 1 float128: 2 ldouble: 7 Function: "expm1_towardzero": double: 1 -float: 2 float128: 4 ldouble: 6 Function: "expm1_upward": double: 1 -float: 1 float128: 3 ldouble: 6 @@ -1508,49 +1497,41 @@ ldouble: 2 Function: "log10p1": double: 2 -float: 2 float128: 3 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 float128: 4 ldouble: 6 Function: "log10p1_towardzero": double: 3 -float: 2 float128: 3 ldouble: 6 Function: "log10p1_upward": double: 2 -float: 3 float128: 4 ldouble: 6 Function: "log1p": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "log1p_downward": double: 1 -float: 2 float128: 3 ldouble: 2 Function: "log1p_towardzero": double: 2 -float: 2 float128: 3 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 float128: 2 ldouble: 3 @@ -1580,25 +1561,21 @@ ldouble: 4 Function: "log2p1": double: 2 -float: 2 float128: 3 ldouble: 2 Function: "log2p1_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log2p1_towardzero": double: 2 -float: 2 float128: 2 ldouble: 5 Function: "log2p1_upward": double: 2 -float: 2 float128: 3 ldouble: 6 @@ -1620,25 +1597,21 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "logp1_downward": double: 1 -float: 2 float128: 3 ldouble: 2 Function: "logp1_towardzero": double: 2 -float: 2 float128: 3 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 float128: 2 ldouble: 3 @@ -1828,25 +1801,21 @@ ldouble: 6 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps index d56327ac33..20036c779c 100644 --- a/sysdeps/powerpc/nofpu/libm-test-ulps +++ b/sysdeps/powerpc/nofpu/libm-test-ulps @@ -987,22 +987,18 @@ ldouble: 4 Function: "exp10m1": double: 3 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 9 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 9 Function: "exp10m1_upward": double: 4 -float: 4 ldouble: 10 Function: "exp2": @@ -1027,22 +1023,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 7 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 8 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 9 Function: "exp_downward": @@ -1062,22 +1054,18 @@ ldouble: 1 Function: "expm1": double: 1 -float: 1 ldouble: 1 Function: "expm1_downward": double: 1 -float: 1 ldouble: 5 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 5 Function: "expm1_upward": double: 1 -float: 1 ldouble: 6 Function: "fma": @@ -1269,42 +1257,34 @@ ldouble: 2 Function: "log10p1": double: 1 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 6 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 6 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 6 Function: "log1p": double: 1 -float: 1 ldouble: 2 Function: "log1p_downward": double: 2 -float: 2 ldouble: 2 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1329,22 +1309,18 @@ ldouble: 4 Function: "log2p1": double: 1 -float: 2 ldouble: 2 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 4 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 5 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 6 Function: "log_downward": @@ -1362,22 +1338,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 2 Function: "logp1_downward": double: 2 -float: 2 ldouble: 2 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "mul_downward_ldouble": @@ -1560,22 +1532,18 @@ ldouble: 6 Function: "tgamma": double: 9 -float: 8 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/powerpc/powerpc64/le/Makefile b/sysdeps/powerpc/powerpc64/le/Makefile index 9d568d4f44..b77775cf95 100644 --- a/sysdeps/powerpc/powerpc64/le/Makefile +++ b/sysdeps/powerpc/powerpc64/le/Makefile @@ -129,6 +129,10 @@ CFLAGS-tst-strtod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod-nan-locale.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-nan-locale.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod1i.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod3.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod4.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod5i.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod6.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom-locale.c += $(type-float128-CFLAGS) diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncpy.S b/sysdeps/powerpc/powerpc64/le/power9/strncpy.S index eccb2ffbb0..58139ad9e8 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strncpy.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strncpy.S @@ -43,7 +43,7 @@ # endif #endif -#define FRAMESIZE (FRAME_MIN_SIZE+8) +#define FRAMESIZE (FRAME_MIN_SIZE+16) /* Implements the function diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c index d453b6535d..6eda080fe2 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c index 0a77812b38..b9cbce2d92 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c index fd3b08d7e7..67f7d4052c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c index a848fcd8b4..f3bccf297c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c index a31b918879..3b0ea87ec2 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c @@ -13,7 +13,7 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c index 7f4c9f1d4b..b467fdecc1 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c index af855e6a5c..6e4e2ea14b 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c @@ -9,12 +9,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ /* Define multiple versions only for definition in libc. */ #if IS_IN (libc) diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile index 04ea56559e..4c1dc04b20 100644 --- a/sysdeps/pthread/Makefile +++ b/sysdeps/pthread/Makefile @@ -174,6 +174,7 @@ tests += \ tst-join13 \ tst-join14 \ tst-join15 \ + tst-join16 \ tst-key1 \ tst-key2 \ tst-key3 \ @@ -215,6 +216,7 @@ tests += \ tst-pt-vfork2 \ tst-pthread-exit-signal \ tst-pthread-mutexattr \ + tst-pthread-mutexattr-2 \ tst-pthread-raise-blocked-self \ tst-pthread-setuid-loop \ tst-pthread_cancel-exited \ @@ -291,6 +293,7 @@ tests-time64 += \ tst-cnd-timedwait-time64 \ tst-cond11-time64 \ tst-join14-time64 \ + tst-join16-time64 \ tst-mtx-timedlock-time64 \ tst-rwlock14-time64 \ tst-sem5-time64 \ diff --git a/sysdeps/pthread/tst-join16-time64.c b/sysdeps/pthread/tst-join16-time64.c new file mode 100644 index 0000000000..730cc56563 --- /dev/null +++ b/sysdeps/pthread/tst-join16-time64.c @@ -0,0 +1 @@ +#include "tst-join16.c" diff --git a/sysdeps/pthread/tst-join16.c b/sysdeps/pthread/tst-join16.c new file mode 100644 index 0000000000..8bf37b5e42 --- /dev/null +++ b/sysdeps/pthread/tst-join16.c @@ -0,0 +1,87 @@ +/* Test pthread_timedjoin_np and pthread_clockjoin_np with an invalid timeout. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <time.h> +#include <support/check.h> +#include <support/xthread.h> +#include <support/xtime.h> + + +#define CLOCK_USE_TIMEDJOIN (-1) + +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + +static void * +tf (void *arg) +{ + xpthread_mutex_lock (&lock); + xpthread_mutex_unlock (&lock); + return (void *) 42l; +} + +static int +do_test_clock (clockid_t clockid) +{ + const clockid_t clockid_for_get = + (clockid == CLOCK_USE_TIMEDJOIN) ? CLOCK_REALTIME : clockid; + + xpthread_mutex_lock (&lock); + pthread_t th = xpthread_create (NULL, tf, NULL); + + void *status; + int ret; + struct timespec timeout = xclock_now (clockid_for_get); + timeout.tv_sec += 2; + timeout.tv_nsec = -1; + if (clockid == CLOCK_USE_TIMEDJOIN) + ret = pthread_timedjoin_np (th, &status, &timeout); + else + ret = pthread_clockjoin_np (th, &status, clockid, &timeout); + TEST_COMPARE (ret, EINVAL); + timeout.tv_nsec = 1000000000; + if (clockid == CLOCK_USE_TIMEDJOIN) + ret = pthread_timedjoin_np (th, &status, &timeout); + else + ret = pthread_clockjoin_np (th, &status, clockid, &timeout); + TEST_COMPARE (ret, EINVAL); + xpthread_mutex_unlock (&lock); + timeout.tv_nsec = 0; + ret = pthread_join (th, &status); + TEST_COMPARE (ret, 0); + if (status != (void *) 42l) + FAIL_EXIT1 ("return value %p, expected %p\n", status, (void *) 42l); + + return 0; +} + +static int +do_test (void) +{ + puts ("testing pthread_timedjoin_np"); + do_test_clock (CLOCK_USE_TIMEDJOIN); + puts ("testing CLOCK_REALTIME"); + do_test_clock (CLOCK_REALTIME); + puts ("testing CLOCK_MONOTONIC"); + do_test_clock (CLOCK_MONOTONIC); + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/pthread/tst-pthread-mutexattr-2.c b/sysdeps/pthread/tst-pthread-mutexattr-2.c new file mode 100644 index 0000000000..ec92e55322 --- /dev/null +++ b/sysdeps/pthread/tst-pthread-mutexattr-2.c @@ -0,0 +1,60 @@ +/* Test pthread_mutexattr_gettype and pthread_mutexattr_settype with + valid and invalid kinds. + + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <stdio.h> +#include <pthread.h> + +#include <support/check.h> +#include <support/xthread.h> + + +static void +do_test_kind (int test_kind, const char *name) +{ + printf ("testing %s\n", name); + pthread_mutexattr_t attr; + xpthread_mutexattr_init (&attr); + int ret = pthread_mutexattr_settype (&attr, test_kind); + TEST_COMPARE (ret, 0); + int kind = 123456789; + ret = pthread_mutexattr_gettype (&attr, &kind); + TEST_COMPARE (ret, 0); + TEST_COMPARE (kind, test_kind); +} + +int +do_test (void) +{ + do_test_kind (PTHREAD_MUTEX_NORMAL, "PTHREAD_MUTEX_NORMAL"); + do_test_kind (PTHREAD_MUTEX_ERRORCHECK, "PTHREAD_MUTEX_ERRORCHECK"); + do_test_kind (PTHREAD_MUTEX_RECURSIVE, "PTHREAD_MUTEX_RECURSIVE"); + do_test_kind (PTHREAD_MUTEX_DEFAULT, "PTHREAD_MUTEX_DEFAULT"); + + /* Also test an invalid kind. */ + pthread_mutexattr_t attr; + xpthread_mutexattr_init (&attr); + int ret = pthread_mutexattr_settype (&attr, 123456789); + TEST_COMPARE (ret, EINVAL); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps index 9ad64d1d85..cccc864a7a 100644 --- a/sysdeps/riscv/nofpu/libm-test-ulps +++ b/sysdeps/riscv/nofpu/libm-test-ulps @@ -974,7 +974,6 @@ ldouble: 3 Function: "exp10m1": double: 2 -float: 1 ldouble: 1 Function: "exp2": @@ -996,7 +995,6 @@ ldouble: 2 Function: "exp2m1": double: 1 -float: 1 ldouble: 1 Function: "exp_downward": @@ -1013,22 +1011,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1173,27 +1167,22 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 1 ldouble: 3 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 1 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1215,7 +1204,6 @@ ldouble: 1 Function: "log2p1": double: 1 -float: 1 ldouble: 3 Function: "log_downward": @@ -1230,22 +1218,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 1 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1361,22 +1345,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 5 -float: 5 ldouble: 5 Function: "tgamma_towardzero": double: 5 -float: 4 ldouble: 5 Function: "tgamma_upward": double: 4 -float: 4 ldouble: 4 Function: "y0": diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps index 233186f29a..14fc7633af 100644 --- a/sysdeps/riscv/rvd/libm-test-ulps +++ b/sysdeps/riscv/rvd/libm-test-ulps @@ -977,22 +977,18 @@ ldouble: 3 Function: "exp10m1": double: 4 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 6 Function: "exp10m1_upward": double: 5 -float: 3 ldouble: 6 Function: "exp2": @@ -1014,22 +1010,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 4 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 5 Function: "exp_downward": @@ -1046,22 +1038,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1207,42 +1195,34 @@ ldouble: 1 Function: "log10p1": double: 2 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 4 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 3 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 4 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 1 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1264,22 +1244,18 @@ ldouble: 1 Function: "log2p1": double: 2 -float: 2 ldouble: 3 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 3 Function: "log_downward": @@ -1294,22 +1270,18 @@ ldouble: 2 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 1 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1431,22 +1403,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 8 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/riscv/start.S b/sysdeps/riscv/start.S index ede186ef23..fd6f1ca038 100644 --- a/sysdeps/riscv/start.S +++ b/sysdeps/riscv/start.S @@ -87,6 +87,7 @@ load_gp: ret .section .preinit_array,"aw" + .align PTRLOG .dc.a load_gp /* Define a symbol for the first piece of initialized data. */ diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps index e10b5c69ae..a25bb505b3 100644 --- a/sysdeps/s390/fpu/libm-test-ulps +++ b/sysdeps/s390/fpu/libm-test-ulps @@ -228,7 +228,7 @@ ldouble: 3 Function: Real part of "cacosh_upward": double: 4 -float: 3 +float: 4 ldouble: 6 Function: Imaginary part of "cacosh_upward": @@ -720,6 +720,7 @@ float: 1 ldouble: 1 Function: Imaginary part of "csin": +float: 1 ldouble: 1 Function: Real part of "csin_downward": @@ -978,22 +979,18 @@ ldouble: 3 Function: "exp10m1": double: 4 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 6 Function: "exp10m1_upward": double: 5 -float: 3 ldouble: 6 Function: "exp2": @@ -1015,22 +1012,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 4 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 5 Function: "exp_downward": @@ -1047,22 +1040,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1208,42 +1197,34 @@ ldouble: 1 Function: "log10p1": double: 2 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 4 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 3 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 4 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 1 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1263,22 +1244,18 @@ ldouble: 1 Function: "log2p1": double: 2 -float: 2 ldouble: 3 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 3 Function: "log_downward": @@ -1292,22 +1269,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 1 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1429,22 +1402,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c index 51dd979607..9ac8a82df8 100644 --- a/sysdeps/s390/utf16-utf32-z9.c +++ b/sysdeps/s390/utf16-utf32-z9.c @@ -397,8 +397,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ @@ -707,8 +707,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ diff --git a/sysdeps/s390/utf8-utf16-z9.c b/sysdeps/s390/utf8-utf16-z9.c index 421747f058..d19fc3c97b 100644 --- a/sysdeps/s390/utf8-utf16-z9.c +++ b/sysdeps/s390/utf8-utf16-z9.c @@ -170,8 +170,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput __asm__ ("8") = inptr; \ register size_t inlen __asm__ ("9") = inend - inptr; \ - register unsigned char* pOutput __asm__ ("10") = outptr; \ - register size_t outlen __asm__("11") = outend - outptr; \ + register unsigned char* pOutput __asm__ ("6") = outptr; \ + register size_t outlen __asm__("7") = outend - outptr; \ unsigned long cc = 0; \ \ __asm__ __volatile__ (".machine push \n\t" \ @@ -242,8 +242,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ @@ -818,8 +818,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c index 9c4189a5f0..e4323d634c 100644 --- a/sysdeps/s390/utf8-utf32-z9.c +++ b/sysdeps/s390/utf8-utf32-z9.c @@ -170,8 +170,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput __asm__ ("8") = inptr; \ register size_t inlen __asm__ ("9") = inend - inptr; \ - register unsigned char* pOutput __asm__ ("10") = outptr; \ - register size_t outlen __asm__("11") = outend - outptr; \ + register unsigned char* pOutput __asm__ ("6") = outptr; \ + register size_t outlen __asm__("7") = outend - outptr; \ unsigned long cc = 0; \ \ __asm__ __volatile__ (".machine push \n\t" \ @@ -475,8 +475,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ @@ -864,8 +864,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps index 3c84259941..8562796de8 100644 --- a/sysdeps/sh/libm-test-ulps +++ b/sysdeps/sh/libm-test-ulps @@ -394,11 +394,9 @@ float: 1 Function: "expm1": double: 1 -float: 1 Function: "expm1_towardzero": double: 1 -float: 2 Function: "fma_towardzero": double: 1 @@ -462,11 +460,9 @@ float: 1 Function: "log1p": double: 1 -float: 1 Function: "log1p_towardzero": double: 2 -float: 2 Function: "log2": double: 2 @@ -477,11 +473,9 @@ double: 2 Function: "logp1": double: 1 -float: 1 Function: "logp1_towardzero": double: 2 -float: 2 Function: "pow": double: 1 @@ -532,11 +526,9 @@ float: 2 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "y0": double: 3 diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps index 1bb8b7c5f4..6ea02058e9 100644 --- a/sysdeps/sparc/fpu/libm-test-ulps +++ b/sysdeps/sparc/fpu/libm-test-ulps @@ -978,22 +978,18 @@ ldouble: 3 Function: "exp10m1": double: 3 -float: 2 ldouble: 3 Function: "exp10m1_downward": double: 3 -float: 3 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 ldouble: 6 Function: "exp10m1_upward": double: 4 -float: 4 ldouble: 6 Function: "exp2": @@ -1018,22 +1014,18 @@ ldouble: 2 Function: "exp2m1": double: 2 -float: 2 ldouble: 2 Function: "exp2m1_downward": double: 3 -float: 3 ldouble: 3 Function: "exp2m1_towardzero": double: 3 -float: 2 ldouble: 4 Function: "exp2m1_upward": double: 3 -float: 3 ldouble: 5 Function: "exp_downward": @@ -1050,22 +1042,18 @@ float: 1 Function: "expm1": double: 1 -float: 1 ldouble: 2 Function: "expm1_downward": double: 1 -float: 1 ldouble: 2 Function: "expm1_towardzero": double: 1 -float: 2 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 ldouble: 3 Function: "gamma": @@ -1212,42 +1200,34 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 3 ldouble: 4 Function: "log10p1_towardzero": double: 3 -float: 2 ldouble: 3 Function: "log10p1_upward": double: 2 -float: 3 ldouble: 4 Function: "log1p": double: 1 -float: 1 ldouble: 3 Function: "log1p_downward": double: 2 -float: 2 ldouble: 3 Function: "log1p_towardzero": double: 2 -float: 2 ldouble: 3 Function: "log1p_upward": double: 2 -float: 2 ldouble: 2 Function: "log2": @@ -1272,22 +1252,18 @@ ldouble: 1 Function: "log2p1": double: 1 -float: 2 ldouble: 3 Function: "log2p1_downward": double: 2 -float: 2 ldouble: 3 Function: "log2p1_towardzero": double: 2 -float: 2 ldouble: 2 Function: "log2p1_upward": double: 2 -float: 2 ldouble: 3 Function: "log_downward": @@ -1305,22 +1281,18 @@ ldouble: 1 Function: "logp1": double: 1 -float: 1 ldouble: 3 Function: "logp1_downward": double: 2 -float: 2 ldouble: 3 Function: "logp1_towardzero": double: 2 -float: 2 ldouble: 3 Function: "logp1_upward": double: 2 -float: 2 ldouble: 2 Function: "pow": @@ -1444,22 +1416,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 59998c7af4..527c7a5ae8 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -95,6 +95,8 @@ sysdep_routines += \ process_vm_writev \ pselect32 \ readahead \ + sched_getattr \ + sched_setattr \ setfsgid \ setfsuid \ setvmaname \ @@ -224,9 +226,11 @@ tests += \ tst-process_mrelease \ tst-quota \ tst-rlimit-infinity \ + tst-sched_setattr \ tst-scm_rights \ tst-sigtimedwait \ tst-sync_file_range \ + tst-syscall-restart \ tst-sysconf-iov_max \ tst-sysvmsg-linux \ tst-sysvsem-linux \ @@ -395,6 +399,16 @@ $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py < /dev/null > $@ 2>&1; $(evaluate-test) $(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps) +tests-special += \ + $(objpfx)tst-sched-consts.out \ + # tests-special +$(objpfx)tst-sched-consts.out: ../sysdeps/unix/sysv/linux/tst-sched-consts.py + $(sysdeps-linux-python) \ + ../sysdeps/unix/sysv/linux/tst-sched-consts.py \ + $(sysdeps-linux-python-cc) \ + < /dev/null > $@ 2>&1; $(evaluate-test) +$(objpfx)tst-sched-consts.out: $(sysdeps-linux-python-deps) + tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 endif # $(subdir) == misc diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions index 268ba1b6ac..213ff5f1fe 100644 --- a/sysdeps/unix/sysv/linux/Versions +++ b/sysdeps/unix/sysv/linux/Versions @@ -328,6 +328,10 @@ libc { posix_spawnattr_getcgroup_np; posix_spawnattr_setcgroup_np; } + GLIBC_2.41 { + sched_getattr; + sched_setattr; + } GLIBC_PRIVATE { # functions used in other libraries __syscall_rt_sigqueueinfo; diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist index 68eeca1c08..38db77e4f7 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist @@ -2748,3 +2748,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index b685106954..98687cae0d 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -128,3 +128,8 @@ GLIBC_2.40 _ZGVsMxvv_hypot F GLIBC_2.40 _ZGVsMxvv_hypotf F GLIBC_2.40 _ZGVsMxvv_pow F GLIBC_2.40 _ZGVsMxvv_powf F +GLIBC_2.41 _ZGVnN2v_logp1 F +GLIBC_2.41 _ZGVnN2v_logp1f F +GLIBC_2.41 _ZGVnN4v_logp1f F +GLIBC_2.41 _ZGVsMxv_logp1 F +GLIBC_2.41 _ZGVsMxv_logp1f F diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist index 34c187b721..637bfce9fb 100644 --- a/sysdeps/unix/sysv/linux/alpha/libc.abilist +++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist @@ -3095,6 +3095,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/arc/Makefile b/sysdeps/unix/sysv/linux/arc/Makefile index eca9a9fd3a..dd3da52c39 100644 --- a/sysdeps/unix/sysv/linux/arc/Makefile +++ b/sysdeps/unix/sysv/linux/arc/Makefile @@ -11,12 +11,3 @@ ifeq ($(subdir),misc) sysdep_headers += sys/cachectl.h sysdep_routines += cacheflush endif - -abi-variants := arcle arcbe - -ifeq (,$(filter $(default-abi),$(abi-variants))) -$(error Unknown ABI $(default-abi), must be one of $(abi-variants)) -endif - -abi-arcle-condition := !defined __BIG_ENDIAN__ -abi-arcbe-condition := defined __BIG_ENDIAN__ diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist index 916c18ea94..4a305cf730 100644 --- a/sysdeps/unix/sysv/linux/arc/libc.abilist +++ b/sysdeps/unix/sysv/linux/arc/libc.abilist @@ -2509,3 +2509,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/arc/shlib-versions b/sysdeps/unix/sysv/linux/arc/shlib-versions index 343c0a0450..62c1bd57b3 100644 --- a/sysdeps/unix/sysv/linux/arc/shlib-versions +++ b/sysdeps/unix/sysv/linux/arc/shlib-versions @@ -1,7 +1,3 @@ DEFAULT GLIBC_2.32 -%ifdef HAVE_ARC_BE -ld=ld-linux-arceb.so.2 -%else ld=ld-linux-arc.so.2 -%endif diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist index ea95de282a..1d54f71b14 100644 --- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist @@ -2801,6 +2801,8 @@ GLIBC_2.4 xdrstdio_create F GLIBC_2.4 xencrypt F GLIBC_2.4 xprt_register F GLIBC_2.4 xprt_unregister F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/arm/bits/hwcap.h b/sysdeps/unix/sysv/linux/arm/bits/hwcap.h index b104de473c..7b573facb9 100644 --- a/sysdeps/unix/sysv/linux/arm/bits/hwcap.h +++ b/sysdeps/unix/sysv/linux/arm/bits/hwcap.h @@ -49,3 +49,11 @@ #define HWCAP_ASIMDFHM 33554432 #define HWCAP_ASIMDBF16 67108864 #define HWCAP_I8MM 134217728 + +#define HWCAP2_AES 1 +#define HWCAP2_PMULL 2 +#define HWCAP2_SHA1 4 +#define HWCAP2_SHA2 8 +#define HWCAP2_CRC32 16 +#define HWCAP2_SB 32 +#define HWCAP2_SSBS 64 diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist index 1cdbc983e1..ff7e8bc40b 100644 --- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist @@ -2798,6 +2798,8 @@ GLIBC_2.4 xdrstdio_create F GLIBC_2.4 xencrypt F GLIBC_2.4 xprt_register F GLIBC_2.4 xprt_unregister F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/bits/mman-linux.h b/sysdeps/unix/sysv/linux/bits/mman-linux.h index 522333c50a..161a885096 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-linux.h +++ b/sysdeps/unix/sysv/linux/bits/mman-linux.h @@ -43,6 +43,7 @@ #define MAP_PRIVATE 0x02 /* Changes are private. */ #define MAP_SHARED_VALIDATE 0x03 /* Share changes and validate extension flags. */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ #define MAP_TYPE 0x0f /* Mask for type of mapping. */ /* Other flags. */ diff --git a/sysdeps/unix/sysv/linux/bits/mman-shared.h b/sysdeps/unix/sysv/linux/bits/mman-shared.h index d8ed4436b6..7a9874204d 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-shared.h +++ b/sysdeps/unix/sysv/linux/bits/mman-shared.h @@ -42,7 +42,7 @@ # define MLOCK_ONFAULT 1U # endif -/* Access rights for pkey_alloc. */ +/* Access restrictions for pkey_alloc. */ # ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 # define PKEY_DISABLE_WRITE 0x2 @@ -59,16 +59,16 @@ int memfd_create (const char *__name, unsigned int __flags) __THROW; int mlock2 (const void *__addr, size_t __length, unsigned int __flags) __THROW; /* Allocate a new protection key, with the PKEY_DISABLE_* bits - specified in ACCESS_RIGHTS. The protection key mask for the + specified in ACCESS_RESTRICTIONS. The protection key mask for the current thread is updated to match the access privilege for the new key. */ -int pkey_alloc (unsigned int __flags, unsigned int __access_rights) __THROW; +int pkey_alloc (unsigned int __flags, unsigned int __access_restrictions) __THROW; -/* Update the access rights for the current thread for KEY, which must +/* Update the access restrictions for the current thread for KEY, which must have been allocated using pkey_alloc. */ -int pkey_set (int __key, unsigned int __access_rights) __THROW; +int pkey_set (int __key, unsigned int __access_restrictions) __THROW; -/* Return the access rights for the current thread for KEY, which must +/* Return the access restrictions for the current thread for KEY, which must have been allocated using pkey_alloc. */ int pkey_get (int __key) __THROW; diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h index a53e1362a0..f03f5f5ab3 100644 --- a/sysdeps/unix/sysv/linux/bits/sched.h +++ b/sysdeps/unix/sysv/linux/bits/sched.h @@ -29,15 +29,60 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #ifdef __USE_GNU +# define SCHED_NORMAL 0 # define SCHED_BATCH 3 # define SCHED_ISO 4 # define SCHED_IDLE 5 # define SCHED_DEADLINE 6 +/* Flags that can be used in policy values. */ # define SCHED_RESET_ON_FORK 0x40000000 -#endif -#ifdef __USE_GNU +/* Flags for the sched_flags field in struct sched_attr. */ +#define SCHED_FLAG_RESET_ON_FORK 0x01 +#define SCHED_FLAG_RECLAIM 0x02 +#define SCHED_FLAG_DL_OVERRUN 0x04 +#define SCHED_FLAG_KEEP_POLICY 0x08 +#define SCHED_FLAG_KEEP_PARAMS 0x10 +#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20 +#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 + +/* Combinations of sched_flags fields. */ +#define SCHED_FLAG_KEEP_ALL \ + (SCHED_FLAG_KEEP_POLICY | SCHED_FLAG_KEEP_PARAMS) +#define SCHED_FLAG_UTIL_CLAMP \ + (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX) + +/* Use "" to work around incorrect macro expansion of the + __has_include argument (GCC PR 80005). */ +# ifdef __has_include +# if __has_include ("linux/sched/types.h") +/* Some older Linux versions defined sched_param in <linux/sched/types.h>. */ +# define sched_param __glibc_mask_sched_param +# include <linux/sched/types.h> +# undef sched_param +# endif +# endif +# ifndef SCHED_ATTR_SIZE_VER0 +# include <linux/types.h> +# define SCHED_ATTR_SIZE_VER0 48 +# define SCHED_ATTR_SIZE_VER1 56 +struct sched_attr +{ + __u32 size; + __u32 sched_policy; + __u64 sched_flags; + __s32 sched_nice; + __u32 sched_priority; + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; + __u32 sched_util_min; + __u32 sched_util_max; + /* Additional fields may be added at the end. */ +}; +# endif /* !SCHED_ATTR_SIZE_VER0 */ + /* Cloning flags. */ # define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */ # define CLONE_VM 0x00000100 /* Set if VM shared between processes. */ @@ -97,6 +142,17 @@ extern int getcpu (unsigned int *, unsigned int *) __THROW; /* Switch process to namespace of type NSTYPE indicated by FD. */ extern int setns (int __fd, int __nstype) __THROW; + +/* Apply the scheduling attributes from *ATTR to the process or thread TID. */ +int sched_setattr (pid_t tid, struct sched_attr *attr, unsigned int flags) + __THROW __nonnull ((2)); + +/* Obtain the scheduling attributes of the process or thread TID and + store it in *ATTR. */ +int sched_getattr (pid_t tid, struct sched_attr *attr, unsigned int size, + unsigned int flags) + __THROW __nonnull ((2)) __attr_access ((__write_only__, 2, 3)); + #endif __END_DECLS diff --git a/sysdeps/unix/sysv/linux/bits/uio-ext.h b/sysdeps/unix/sysv/linux/bits/uio-ext.h index ead7a09156..85ed21bac5 100644 --- a/sysdeps/unix/sysv/linux/bits/uio-ext.h +++ b/sysdeps/unix/sysv/linux/bits/uio-ext.h @@ -48,6 +48,8 @@ extern ssize_t process_vm_writev (pid_t __pid, const struct iovec *__lvec, #define RWF_NOWAIT 0x00000008 /* per-IO nonblocking mode. */ #define RWF_APPEND 0x00000010 /* per-IO O_APPEND. */ #define RWF_NOAPPEND 0x00000020 /* per-IO negation of O_APPEND */ +#define RWF_ATOMIC 0x00000040 /* Write is to be issued with torn-write + prevention. */ __END_DECLS diff --git a/sysdeps/unix/sysv/linux/configure b/sysdeps/unix/sysv/linux/configure index 4d642e953b..8cda8e9451 100644..100755 --- a/sysdeps/unix/sysv/linux/configure +++ b/sysdeps/unix/sysv/linux/configure @@ -1,3 +1,4 @@ +ac_default_prefix=/usr # This file is generated from configure.ac by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/unix/sysv/linux. @@ -116,7 +117,9 @@ if test -n "$sysheaders"; then CPPFLAGS=$OLD_CPPFLAGS fi -if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/" -o "$prefix" = "NONE"; then + + +if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/"; then if test $enable_sanity = yes; then echo "\ *** On GNU/Linux systems the GNU C Library should not be installed into diff --git a/sysdeps/unix/sysv/linux/configure.ac b/sysdeps/unix/sysv/linux/configure.ac index c177e082a9..4c43cda877 100644 --- a/sysdeps/unix/sysv/linux/configure.ac +++ b/sysdeps/unix/sysv/linux/configure.ac @@ -71,7 +71,9 @@ if test -n "$sysheaders"; then CPPFLAGS=$OLD_CPPFLAGS fi -if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/" -o "$prefix" = "NONE"; then +AC_PREFIX_DEFAULT([/usr]) + +if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/"; then if test $enable_sanity = yes; then echo "\ *** On GNU/Linux systems the GNU C Library should not be installed into diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist index 96d45961e2..c3ed65467d 100644 --- a/sysdeps/unix/sysv/linux/csky/libc.abilist +++ b/sysdeps/unix/sysv/linux/csky/libc.abilist @@ -2785,3 +2785,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/getdents64.c b/sysdeps/unix/sysv/linux/getdents64.c index 227fbf21ae..795bd935f0 100644 --- a/sysdeps/unix/sysv/linux/getdents64.c +++ b/sysdeps/unix/sysv/linux/getdents64.c @@ -33,100 +33,3 @@ __getdents64 (int fd, void *buf, size_t nbytes) } libc_hidden_def (__getdents64) weak_alias (__getdents64, getdents64) - -#if _DIRENT_MATCHES_DIRENT64 -strong_alias (__getdents64, __getdents) -#else -# include <shlib-compat.h> - -# if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) -# include <olddirent.h> -# include <unistd.h> - -static ssize_t -handle_overflow (int fd, __off64_t offset, ssize_t count) -{ - /* If this is the first entry in the buffer, we can report the - error. */ - if (offset == 0) - { - __set_errno (EOVERFLOW); - return -1; - } - - /* Otherwise, seek to the overflowing entry, so that the next call - will report the error, and return the data read so far. */ - if (__lseek64 (fd, offset, SEEK_SET) != 0) - return -1; - return count; -} - -ssize_t -__old_getdents64 (int fd, char *buf, size_t nbytes) -{ - /* We do not move the individual directory entries. This is only - possible if the target type (struct __old_dirent64) is smaller - than the source type. */ - _Static_assert (offsetof (struct __old_dirent64, d_name) - <= offsetof (struct dirent64, d_name), - "__old_dirent64 is larger than dirent64"); - _Static_assert (__alignof__ (struct __old_dirent64) - <= __alignof__ (struct dirent64), - "alignment of __old_dirent64 is larger than dirent64"); - - ssize_t retval = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes); - if (retval > 0) - { - /* This is the marker for the first entry. Offset 0 is reserved - for the first entry (see rewinddir). Here, we use it as a - marker for the first entry in the buffer. We never actually - seek to offset 0 because handle_overflow reports the error - directly, so it does not matter that the offset is incorrect - if entries have been read from the descriptor before (so that - the descriptor is not actually at offset 0). */ - __off64_t previous_offset = 0; - - char *p = buf; - char *end = buf + retval; - while (p < end) - { - struct dirent64 *source = (struct dirent64 *) p; - - /* Copy out the fixed-size data. */ - __ino_t ino = source->d_ino; - __off64_t offset = source->d_off; - unsigned int reclen = source->d_reclen; - unsigned char type = source->d_type; - - /* Check for ino_t overflow. */ - if (__glibc_unlikely (ino != source->d_ino)) - return handle_overflow (fd, previous_offset, p - buf); - - /* Convert to the target layout. Use a separate struct and - memcpy to side-step aliasing issues. */ - struct __old_dirent64 result; - result.d_ino = ino; - result.d_off = offset; - result.d_reclen = reclen; - result.d_type = type; - - /* Write the fixed-sized part of the result to the - buffer. */ - size_t result_name_offset = offsetof (struct __old_dirent64, d_name); - memcpy (p, &result, result_name_offset); - - /* Adjust the position of the name if necessary. Copy - everything until the end of the record, including the - terminating NUL byte. */ - if (result_name_offset != offsetof (struct dirent64, d_name)) - memmove (p + result_name_offset, source->d_name, - reclen - offsetof (struct dirent64, d_name)); - - p += reclen; - previous_offset = offset; - } - } - return retval; -} -# endif /* SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) */ -#endif /* _DIRENT_MATCHES_DIRENT64 */ diff --git a/sysdeps/unix/sysv/linux/gettimeofday.c b/sysdeps/unix/sysv/linux/gettimeofday.c index 7ab147c614..5ca8916611 100644 --- a/sysdeps/unix/sysv/linux/gettimeofday.c +++ b/sysdeps/unix/sysv/linux/gettimeofday.c @@ -37,11 +37,11 @@ __gettimeofday_syscall (struct timeval *restrict tv, void *restrict tz) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_gettimeofday = dl_vdso_vsym (HAVE_GETTIMEOFDAY_VSYSCALL) +# define INIT_ARCH() libc_ifunc (__gettimeofday, - vdso_gettimeofday ? VDSO_IFUNC_RET (vdso_gettimeofday) - : (void *) __gettimeofday_syscall) + GLRO(dl_vdso_gettimeofday) != NULL + ? VDSO_IFUNC_RET (GLRO(dl_vdso_gettimeofday)) + : (void *) __gettimeofday_syscall) # else int diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist index fbcd60c2b3..8de7644a59 100644 --- a/sysdeps/unix/sysv/linux/hppa/libc.abilist +++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist @@ -2821,6 +2821,8 @@ GLIBC_2.4 sys_errlist D 0x400 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist index c989b433c0..4fedf775d4 100644 --- a/sysdeps/unix/sysv/linux/i386/libc.abilist +++ b/sysdeps/unix/sysv/linux/i386/libc.abilist @@ -3005,6 +3005,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/include/bits/mman-shared.h b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h new file mode 100644 index 0000000000..7c14b5df3c --- /dev/null +++ b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h @@ -0,0 +1,16 @@ +#include <sysdeps/unix/sysv/linux/bits/mman-shared.h> + +#ifndef _ISOMAC + +extern __typeof (pkey_alloc) __pkey_alloc; +libc_hidden_proto (__pkey_alloc) +extern __typeof (pkey_free) __pkey_free; +libc_hidden_proto (__pkey_free) +extern __typeof (pkey_mprotect) __pkey_mprotect; +libc_hidden_proto (__pkey_mprotect) +extern __typeof (pkey_get) __pkey_get; +libc_hidden_proto (__pkey_get) +extern __typeof (pkey_set) __pkey_set; +libc_hidden_proto (__pkey_set) + +#endif diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h index a6fae59aaa..6e3a3d7692 100644 --- a/sysdeps/unix/sysv/linux/internal-signals.h +++ b/sysdeps/unix/sysv/linux/internal-signals.h @@ -90,6 +90,15 @@ internal_signal_restore_set (const internal_sigset_t *set) __NSIG_BYTES); } +static inline void +internal_signal_unblock_signal (int sig) +{ + internal_sigset_t set; + internal_sigemptyset (&set); + internal_sigaddset (&set, sig); + INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &set, NULL, + __NSIG_BYTES); +} /* It is used on timer_create code directly on sigwaitinfo call, so it can not use the internal_sigset_t definitions. */ diff --git a/sysdeps/unix/sysv/linux/internal-sigset.h b/sysdeps/unix/sysv/linux/internal-sigset.h index 5d7020b42d..4b19affd75 100644 --- a/sysdeps/unix/sysv/linux/internal-sigset.h +++ b/sysdeps/unix/sysv/linux/internal-sigset.h @@ -21,7 +21,7 @@ #include <sigsetops.h> -typedef struct +typedef struct _internal_sigset_t { unsigned long int __val[__NSIG_WORDS]; } internal_sigset_t; diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h b/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h new file mode 100644 index 0000000000..7468a05243 --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h @@ -0,0 +1,36 @@ +/* Restartable Sequences Linux LoongArch architecture header. + Copyright (C) 2024 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + RSEQ_SIG uses the following break instruction: + + 0x002a0010 break 0x10 +*/ + +#define RSEQ_SIG 0x002a0010 diff --git a/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h new file mode 100644 index 0000000000..0062756b5c --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h @@ -0,0 +1,21 @@ +/* Regularize <asm/unistd.h> definitions. LoongArch version. + Copyright (C) 2024 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* To avoid the messy usage of the fstat, newfstatat, and statx system calls, we +only use statx. */ +#undef __NR_fstat +#undef __NR_newfstatat diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist index 0023ec1fa1..0024282289 100644 --- a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist +++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist @@ -2269,3 +2269,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist index d9bd6a9b56..142595eb3e 100644 --- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist @@ -2781,6 +2781,8 @@ GLIBC_2.4 xdrstdio_create F GLIBC_2.4 xencrypt F GLIBC_2.4 xprt_register F GLIBC_2.4 xprt_unregister F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist index 439796d693..85e7746c10 100644 --- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist @@ -2948,6 +2948,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist index 1069d3252c..91dc1b8378 100644 --- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist @@ -2834,3 +2834,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist index 17abe08c8b..3440e90f6f 100644 --- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist @@ -2831,3 +2831,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist index 799e508950..5ee7b8c52f 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist @@ -2909,6 +2909,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist index 1c10996cbc..6cb6328e7c 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist @@ -2907,6 +2907,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist index 03d9655f26..ae7474c0f0 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist @@ -2915,6 +2915,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist index 05e402ed30..cdf040dec2 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist @@ -2817,6 +2817,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h index f94e212995..b1d0fb6f6e 100644 --- a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h @@ -24,6 +24,7 @@ #define __NR_clock_settime 112 #define __NR_clock_settime64 404 #define __NR_clone 220 +#define __NR_clone3 435 #define __NR_close 57 #define __NR_close_range 436 #define __NR_connect 203 diff --git a/sysdeps/unix/sysv/linux/nios2/libc.abilist b/sysdeps/unix/sysv/linux/nios2/libc.abilist index 3aa81766aa..773d4c5873 100644 --- a/sysdeps/unix/sysv/linux/nios2/libc.abilist +++ b/sysdeps/unix/sysv/linux/nios2/libc.abilist @@ -2873,3 +2873,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/olddirent.h b/sysdeps/unix/sysv/linux/olddirent.h index 239f790648..065ca41a6e 100644 --- a/sysdeps/unix/sysv/linux/olddirent.h +++ b/sysdeps/unix/sysv/linux/olddirent.h @@ -34,8 +34,6 @@ extern struct __old_dirent64 *__old_readdir64 (DIR *__dirp); libc_hidden_proto (__old_readdir64); extern int __old_readdir64_r (DIR *__dirp, struct __old_dirent64 *__entry, struct __old_dirent64 **__result); -extern __ssize_t __old_getdents64 (int __fd, char *__buf, size_t __nbytes) - attribute_hidden; int __old_scandir64 (const char * __dir, struct __old_dirent64 *** __namelist, int (*__selector) (const struct __old_dirent64 *), diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist index 959e59e7e7..c356a11b1c 100644 --- a/sysdeps/unix/sysv/linux/or1k/libc.abilist +++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist @@ -2259,3 +2259,5 @@ GLIBC_2.40 getcontext F GLIBC_2.40 makecontext F GLIBC_2.40 setcontext F GLIBC_2.40 swapcontext F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/pkey_get.c b/sysdeps/unix/sysv/linux/pkey_get.c index 743bd09268..ed9b6297d1 100644 --- a/sysdeps/unix/sysv/linux/pkey_get.c +++ b/sysdeps/unix/sysv/linux/pkey_get.c @@ -17,10 +17,13 @@ <https://www.gnu.org/licenses/>. */ #include <errno.h> +#include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { __set_errno (ENOSYS); return -1; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/pkey_mprotect.c b/sysdeps/unix/sysv/linux/pkey_mprotect.c index b7afa7946d..e321a7eb1b 100644 --- a/sysdeps/unix/sysv/linux/pkey_mprotect.c +++ b/sysdeps/unix/sysv/linux/pkey_mprotect.c @@ -22,7 +22,7 @@ #include <sysdep.h> int -pkey_mprotect (void *addr, size_t len, int prot, int pkey) +__pkey_mprotect (void *addr, size_t len, int prot, int pkey) { if (pkey == -1) /* If the key is -1, the system call is precisely equivalent to @@ -30,3 +30,5 @@ pkey_mprotect (void *addr, size_t len, int prot, int pkey) return __mprotect (addr, len, prot); return INLINE_SYSCALL_CALL (pkey_mprotect, addr, len, prot, pkey); } +libc_hidden_def (__pkey_mprotect) +weak_alias (__pkey_mprotect, pkey_mprotect) diff --git a/sysdeps/unix/sysv/linux/pkey_set.c b/sysdeps/unix/sysv/linux/pkey_set.c index d5d59e3031..9cb14af0ec 100644 --- a/sysdeps/unix/sysv/linux/pkey_set.c +++ b/sysdeps/unix/sysv/linux/pkey_set.c @@ -17,10 +17,13 @@ <https://www.gnu.org/licenses/>. */ #include <errno.h> +#include <sys/mman.h> int -pkey_set (int key, unsigned int access_rights) +__pkey_set (int key, unsigned int access_restrictions) { __set_errno (ENOSYS); return -1; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist index 9714305608..7937f94cf0 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist @@ -3138,6 +3138,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist index 0beb52c542..d6e35f31d2 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist @@ -3183,6 +3183,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist index cfc2ebd3ec..2268d6890d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist @@ -2892,6 +2892,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S index 164311d2bd..e57cb6e82e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S @@ -56,7 +56,6 @@ ENTRY (__clone) /* Save fn, args, stack across syscall. */ mr r30,r3 /* Function in r30. */ - mr r29,r5 /* Flags in r29. */ mr r31,r6 /* Argument in r31. */ /* 'flags' argument is first parameter to clone syscall. @@ -77,14 +76,12 @@ ENTRY (__clone) CHECK_SCV_SUPPORT r28 0f /* This is equivalent to DO_CALL_SCV, but we cannot use the macro here because it uses CFI directives and we just called cfi_endproc. */ - mflr r9 - std r9,FRAME_LR_SAVE(r1) + mflr r29 .machine "push" .machine "power9" scv 0 .machine "pop" - ld r9,FRAME_LR_SAVE(r1) - mtlr r9 + mtlr r29 /* Check for child process. */ /* When using scv, error is indicated by negative r3. */ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S index 900c354c9c..913f341d7d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S @@ -39,14 +39,12 @@ because it uses CFI directives and we just called cfi_endproc. */ # define DO_CLONE3_SVC_CALL(jumpfalse) \ CHECK_SCV_SUPPORT r28 jumpfalse; \ - mflr r9; \ - std r9, FRAME_LR_SAVE(r1); \ + mflr r31; \ .machine "push"; \ .machine "power9"; \ scv 0; \ .machine "pop"; \ - ld r9, FRAME_LR_SAVE(r1); \ - mtlr r9; \ + mtlr r31; \ /* With scv an, an error is a value -4095 <= x < 0. */ \ cmpdi cr1, r3, 0; \ b 1f; @@ -66,13 +64,15 @@ ENTRY(__clone3) /* Save some regs in the "red zone". */ #ifdef USE_PPC_SCV - std r28, -24(r1) - cfi_offset (r28, -24) + std r28, -32(r1) + cfi_offset (r28, -32) #endif - std r29, -16(r1) - std r30, -8(r1) - cfi_offset (r29, -16) - cfi_offset (r30, -8) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) + cfi_offset (r29, -24) + cfi_offset (r30, -16) + cfi_offset (r31, -8) /* Save func and arg across syscall. */ mr r30, r5 /* Function in r30. */ @@ -132,16 +132,19 @@ L(parent): /* Parent. Restore registers & return. */ #ifdef USE_PPC_SCV - cfi_offset (r28, -24) - ld r28, -24(r1) + cfi_offset (r28, -32) + ld r28, -32(r1) cfi_restore (r28) #endif - cfi_offset (r29,-16) - cfi_offset (r30,-8) - ld r29, -16(r1) - ld r30, -8(r1) + cfi_offset (r29,-24) + cfi_offset (r30,-16) + cfi_offset (r31,-8) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) cfi_restore (r29) cfi_restore (r30) + cfi_restore (r31) #ifdef USE_PPC_SCV beq cr1, 0f diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist index 8c9efc5a16..7f61b14bc8 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist @@ -2968,3 +2968,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c index 217ecd004f..ac9ea3a1c1 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c @@ -21,7 +21,7 @@ #include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { if (key < 0 || key > PKEY_MAX) { @@ -40,3 +40,5 @@ pkey_get (int key) return PKEY_DISABLE_WRITE; return 0; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c index cc67b0acff..ac10b5de22 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c @@ -21,7 +21,7 @@ #include <sys/mman.h> int -pkey_set (int key, unsigned int rights) +__pkey_set (int key, unsigned int rights) { if (key < 0 || key > PKEY_MAX || rights > 3) { @@ -46,3 +46,5 @@ pkey_set (int key, unsigned int rights) pkey_write (amr); return 0; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/readdir64.c b/sysdeps/unix/sysv/linux/readdir64.c index e6f5108c0a..e6b8867b7a 100644 --- a/sysdeps/unix/sysv/linux/readdir64.c +++ b/sysdeps/unix/sysv/linux/readdir64.c @@ -26,17 +26,13 @@ #undef __readdir #undef readdir -/* Read a directory entry from DIRP. */ -struct dirent64 * -__readdir64 (DIR *dirp) +/* Read a directory entry from DIRP. No locking. */ +static struct dirent64 * +__readdir64_unlocked (DIR *dirp) { struct dirent64 *dp; int saved_errno = errno; -#if IS_IN (libc) - __libc_lock_lock (dirp->lock); -#endif - if (dirp->offset >= dirp->size) { /* We've emptied out our buffer. Refill it. */ @@ -53,9 +49,6 @@ __readdir64 (DIR *dirp) do not set errno in that case, to indicate success. */ if (bytes == 0 || errno == ENOENT) __set_errno (saved_errno); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif return NULL; } dirp->size = (size_t) bytes; @@ -68,10 +61,16 @@ __readdir64 (DIR *dirp) dirp->offset += dp->d_reclen; dirp->filepos = dp->d_off; -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif + return dp; +} +/* Read a directory entry from DIRP. */ +struct dirent64 * +__readdir64 (DIR *dirp) +{ + __libc_lock_lock (dirp->lock); + struct dirent64 *dp = __readdir64_unlocked (dirp); + __libc_lock_unlock (dirp->lock); return dp; } libc_hidden_def (__readdir64) @@ -99,45 +98,54 @@ __old_readdir64 (DIR *dirp) struct __old_dirent64 *dp; int saved_errno = errno; -#if IS_IN (libc) __libc_lock_lock (dirp->lock); -#endif - if (dirp->offset >= dirp->size) + while (1) { - /* We've emptied out our buffer. Refill it. */ + errno = 0; + struct dirent64 *newdp = __readdir64_unlocked (dirp); + if (newdp == NULL) + { + if (errno == 0 && dirp->errcode != 0) + __set_errno (dirp->errcode); + else if (errno == 0) + __set_errno (saved_errno); + dp = NULL; + break; + } - size_t maxread = dirp->allocation; - ssize_t bytes; + /* Convert to the target layout. Use a separate struct and + memcpy to side-step aliasing issues. */ + struct __old_dirent64 result; + result.d_ino = newdp->d_ino; + result.d_off = newdp->d_off; + result.d_reclen = newdp->d_reclen; + result.d_type = newdp->d_type; - bytes = __old_getdents64 (dirp->fd, dirp->data, maxread); - if (bytes <= 0) + /* Check for ino_t overflow. */ + if (__glibc_unlikely (result.d_ino != newdp->d_ino)) { - /* Linux may fail with ENOENT on some file systems if the - directory inode is marked as dead (deleted). POSIX - treats this as a regular end-of-directory condition, so - do not set errno in that case, to indicate success. */ - if (bytes == 0 || errno == ENOENT) - __set_errno (saved_errno); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif - return NULL; + dirp->errcode = ENAMETOOLONG; + continue; } - dirp->size = (size_t) bytes; - /* Reset the offset into the buffer. */ - dirp->offset = 0; - } + /* Overwrite the fixed-sized part. */ + dp = (struct __old_dirent64 *) newdp; + memcpy (dp, &result, offsetof (struct __old_dirent64, d_name)); - dp = (struct __old_dirent64 *) &dirp->data[dirp->offset]; - dirp->offset += dp->d_reclen; - dirp->filepos = dp->d_off; + /* Move the name. */ + _Static_assert (offsetof (struct __old_dirent64, d_name) + <= offsetof (struct dirent64, d_name), + "old struct must be smaller"); + if (offsetof (struct __old_dirent64, d_name) + != offsetof (struct dirent64, d_name)) + memmove (dp->d_name, newdp->d_name, strlen (newdp->d_name) + 1); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif + __set_errno (saved_errno); + break; + } + __libc_lock_unlock (dirp->lock); return dp; } libc_hidden_def (__old_readdir64) diff --git a/sysdeps/unix/sysv/linux/readdir64_r.c b/sysdeps/unix/sysv/linux/readdir64_r.c index e87882ee06..c42a161ffc 100644 --- a/sysdeps/unix/sysv/linux/readdir64_r.c +++ b/sysdeps/unix/sysv/linux/readdir64_r.c @@ -37,7 +37,7 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result) __libc_lock_lock (dirp->lock); - do + while (1) { if (dirp->offset >= dirp->size) { @@ -79,26 +79,21 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result) dirp->filepos = dp->d_off; - if (reclen > offsetof (struct dirent64, d_name) + NAME_MAX + 1) + if (reclen <= offsetof (struct dirent64, d_name) + NAME_MAX + 1) + break; + + /* The record is very long. It could still fit into the + caller-supplied buffer if we can skip padding at the end. */ + size_t namelen = _D_EXACT_NAMLEN (dp); + if (namelen <= NAME_MAX) { - /* The record is very long. It could still fit into the - caller-supplied buffer if we can skip padding at the - end. */ - size_t namelen = _D_EXACT_NAMLEN (dp); - if (namelen <= NAME_MAX) - reclen = offsetof (struct dirent64, d_name) + namelen + 1; - else - { - /* The name is too long. Ignore this file. */ - dirp->errcode = ENAMETOOLONG; - dp->d_ino = 0; - continue; - } + reclen = offsetof (struct dirent64, d_name) + namelen + 1; + break; } - /* Skip deleted and ignored files. */ + /* The name is too long. Ignore this file. */ + dirp->errcode = ENAMETOOLONG; } - while (dp->d_ino == 0); if (dp != NULL) { @@ -135,91 +130,37 @@ attribute_compat_text_section __old_readdir64_r (DIR *dirp, struct __old_dirent64 *entry, struct __old_dirent64 **result) { - struct __old_dirent64 *dp; - size_t reclen; - const int saved_errno = errno; - int ret; - - __libc_lock_lock (dirp->lock); - - do + while (1) { - if (dirp->offset >= dirp->size) - { - /* We've emptied out our buffer. Refill it. */ - - size_t maxread = dirp->allocation; - ssize_t bytes; - - maxread = dirp->allocation; - - bytes = __old_getdents64 (dirp->fd, dirp->data, maxread); - if (bytes <= 0) - { - /* On some systems getdents fails with ENOENT when the - open directory has been rmdir'd already. POSIX.1 - requires that we treat this condition like normal EOF. */ - if (bytes < 0 && errno == ENOENT) - { - bytes = 0; - __set_errno (saved_errno); - } - if (bytes < 0) - dirp->errcode = errno; - - dp = NULL; - break; - } - dirp->size = (size_t) bytes; + struct dirent64 new_entry; + struct dirent64 *newp; + int ret = __readdir64_r (dirp, &new_entry, &newp); - /* Reset the offset into the buffer. */ - dirp->offset = 0; + if (ret != 0) + return ret; + else if (newp == NULL) + { + *result = NULL; + return 0; } - - dp = (struct __old_dirent64 *) &dirp->data[dirp->offset]; - - reclen = dp->d_reclen; - - dirp->offset += reclen; - - dirp->filepos = dp->d_off; - - if (reclen > offsetof (struct __old_dirent64, d_name) + NAME_MAX + 1) + else { - /* The record is very long. It could still fit into the - caller-supplied buffer if we can skip padding at the - end. */ - size_t namelen = _D_EXACT_NAMLEN (dp); - if (namelen <= NAME_MAX) - reclen = offsetof (struct __old_dirent64, d_name) + namelen + 1; - else + entry->d_ino = newp->d_ino; + if (entry->d_ino != newp->d_ino) { - /* The name is too long. Ignore this file. */ - dirp->errcode = ENAMETOOLONG; - dp->d_ino = 0; + dirp->errcode = EOVERFLOW; continue; } + size_t namelen = strlen (newp->d_name); + entry->d_off = newp->d_off; + entry->d_reclen = (offsetof (struct __old_dirent64, d_name) + + namelen + 1); + entry->d_type = newp->d_type; + memcpy (entry->d_name, newp->d_name, namelen + 1); + *result = entry; + return 0; } - - /* Skip deleted and ignored files. */ } - while (dp->d_ino == 0); - - if (dp != NULL) - { - *result = memcpy (entry, dp, reclen); - entry->d_reclen = reclen; - ret = 0; - } - else - { - *result = NULL; - ret = dirp->errcode; - } - - __libc_lock_unlock (dirp->lock); - - return ret; } compat_symbol (libc, __old_readdir64_r, readdir64_r, GLIBC_2_1); diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist index 6397a9cb91..4187241f50 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist @@ -2512,3 +2512,5 @@ GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F GLIBC_2.40 __riscv_hwprobe F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist index 71bbf94f66..8935beccac 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist @@ -2712,3 +2712,5 @@ GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F GLIBC_2.40 __riscv_hwprobe F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist index a7467e2850..e69dc7ccf6 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist @@ -3136,6 +3136,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist index fd1cb2972d..7d860001d8 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist @@ -2929,6 +2929,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sched_getattr.c b/sysdeps/unix/sysv/linux/sched_getattr.c new file mode 100644 index 0000000000..e6b9970fcb --- /dev/null +++ b/sysdeps/unix/sysv/linux/sched_getattr.c @@ -0,0 +1,27 @@ +/* Reading scheduling policy and attributes. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sched.h> +#include <sysdep.h> + +int +sched_getattr (pid_t pid, struct sched_attr *attr, unsigned int size, + unsigned int flags) +{ + return INLINE_SYSCALL_CALL (sched_getattr, pid, attr, size, flags); +} diff --git a/sysdeps/unix/sysv/linux/sched_setattr.c b/sysdeps/unix/sysv/linux/sched_setattr.c new file mode 100644 index 0000000000..25403fb454 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sched_setattr.c @@ -0,0 +1,26 @@ +/* Setting scheduling policy and attributes. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sched.h> +#include <sysdep.h> + +int +sched_setattr (pid_t pid, struct sched_attr *attr, unsigned int flags) +{ + return INLINE_SYSCALL_CALL (sched_setattr, pid, attr, flags); +} diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist index ff6e6b1a13..fcb8161841 100644 --- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist @@ -2828,6 +2828,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist index 449d92bbc5..3fd078d125 100644 --- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist @@ -2825,6 +2825,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index c9cf9bb055..c84244f56b 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -29,7 +29,11 @@ ENTRY (__clone) save %sp,-96,%sp save %sp,-96,%sp +#ifdef __sparcv9 flushw +#else + ta 3 +#endif restore cfi_def_cfa_register(%fp) cfi_window_save diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist index e615be759a..1ce1fe9da7 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist @@ -3157,6 +3157,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S index aa5c658ce1..45c6ae7a86 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S @@ -42,15 +42,19 @@ __syscall_cancel_arch_start: __syscall_do_cancel() */ ld [%i0], %g2 andcc %g2, TCB_CANCELED_BITMASK, %g0 +#ifdef __sparcv9 bne,pn %icc, 2f +#else + bne 2f +#endif /* Issue a 6 argument syscall. */ - mov %i1, %g1 - mov %i2, %o0 + mov %i2, %o0 mov %i3, %o1 mov %i4, %o2 mov %i5, %o3 ld [%fp+92], %o4 ld [%fp+96], %o5 + mov %i1, %g1 ta 0x10 .globl __syscall_cancel_arch_end @@ -60,8 +64,13 @@ __syscall_cancel_arch_end: sub %g0, %o0, %o0 1: mov %o0, %i0 +#ifdef __sparcv9 return %i7+8 nop +#else + jmp %i7+8 + restore +#endif 2: call __syscall_do_cancel, 0 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index d2d68f5312..c2ffbb5c8f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -107,6 +107,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x10;" \ "bcc 1f;" \ " nop;" \ @@ -114,7 +115,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist index bd36431dd7..07507b86f6 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist @@ -2793,6 +2793,8 @@ GLIBC_2.4 sys_errlist D 0x430 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S index 21b0728d5a..6c8d1330cb 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S @@ -46,13 +46,13 @@ __syscall_cancel_arch_start: andcc %g2, TCB_CANCELED_BITMASK, %g0 bne,pn %xcc, 2f /* Issue a 6 argument syscall. */ - mov %i1, %g1 - mov %i2, %o0 + mov %i2, %o0 mov %i3, %o1 mov %i4, %o2 mov %i5, %o3 ldx [%fp + STACK_BIAS + 176], %o4 ldx [%fp + STACK_BIAS + 184], %o5 + mov %i1, %g1 ta 0x6d .global __syscall_cancel_arch_end diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 96047424e9..5598fab08a 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -106,6 +106,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ " nop;" \ @@ -113,7 +114,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h index dcabb57fe2..c287740a8c 100644 --- a/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -50,97 +50,109 @@ #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, nr, args...) \ - internal_syscall##nr(__SYSCALL_STRING, name, args) + _internal_syscall##nr(__SYSCALL_STRING, "p", name, args) -#define internal_syscall0(string,name,dummy...) \ +#define _internal_syscall0(string,nc,name,dummy...) \ ({ \ - register long int __g1 __asm__ ("g1") = (name); \ register long __o0 __asm__ ("o0"); \ + long int _name = (long int) (name); \ __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall0(string,name,args...) \ + _internal_syscall0(string, "i", name, args) -#define internal_syscall1(string,name,arg1) \ +#define _internal_syscall1(string,nc,name,arg1) \ ({ \ long int _arg1 = (long int) (arg1); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall1(string,name,args...) \ + _internal_syscall1(string, "i", name, args) -#define internal_syscall2(string,name,arg1,arg2) \ +#define _internal_syscall2(string,nc,name,arg1,arg2) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall2(string,name,args...) \ + _internal_syscall2(string, "i", name, args) -#define internal_syscall3(string,name,arg1,arg2,arg3) \ +#define _internal_syscall3(string,nc,name,arg1,arg2,arg3) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall3(string,name,args...) \ + _internal_syscall3(string, "i", name, args) -#define internal_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall4(string,name,args...) \ + _internal_syscall4(string, "i", name, args) -#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall5(string,name,args...) \ + _internal_syscall5(string, "i", name, args) -#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ @@ -148,20 +160,22 @@ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ long int _arg6 = (long int) (arg6); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ register long int __o5 __asm__ ("o5") = _arg6; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall6(string,name,args...) \ + _internal_syscall6(string, "i", name, args) #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5) \ ({ \ @@ -170,15 +184,15 @@ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ + long int _name = __NR_clone; \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - register long int __g1 __asm__ ("g1") = __NR_clone; \ __asm __volatile (__SYSCALL_STRING : \ "=r" (__o0), "=r" (__o1) : \ - "r" (__g1), "0" (__o0), "1" (__o1), \ + [scn] "i" (_name), "0" (__o0), "1" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL)) \ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index f57e92815e..1edf62c6d7 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -383,7 +383,11 @@ __spawnix (int *pid, const char *file, args.pidfd = 0; args.xflags = xflags; - internal_signal_block_all (&args.oldmask); + /* Avoid the potential issues if caller sets a SIG_IGN for SIGABRT, calls + abort, and another thread issues posix_spawn just after the sigaction + returns. With default options (not setting POSIX_SPAWN_SETSIGDEF), the + process can still see SIG_DFL for SIGABRT, where it should be SIG_IGN. */ + __abort_lock_rdlock (&args.oldmask); /* The clone flags used will create a new child that will run in the same memory space (CLONE_VM) and the execution of calling thread will be @@ -474,7 +478,7 @@ __spawnix (int *pid, const char *file, if ((ec == 0) && (pid != NULL)) *pid = use_pidfd ? args.pidfd : new_pid; - internal_signal_restore_set (&args.oldmask); + __abort_lock_unlock (&args.oldmask); __pthread_setcancelstate (state, NULL); diff --git a/sysdeps/unix/sysv/linux/sys/pidfd.h b/sysdeps/unix/sysv/linux/sys/pidfd.h index 9f88d297e8..85d976939b 100644 --- a/sysdeps/unix/sysv/linux/sys/pidfd.h +++ b/sysdeps/unix/sysv/linux/sys/pidfd.h @@ -20,6 +20,7 @@ #include <fcntl.h> #include <bits/types/siginfo_t.h> +#include <sys/ioctl.h> #define PIDFD_NONBLOCK O_NONBLOCK #define PIDFD_THREAD O_EXCL @@ -28,6 +29,19 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + /* Returns a file descriptor that refers to the process PID. The close-on-exec is set on the file descriptor. */ extern int pidfd_open (__pid_t __pid, unsigned int __flags) __THROW; diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 7871f93b94..aa5b479e2a 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 6.10. -kernel 6.10 +# The list of system calls is current as of Linux 6.11. +kernel 6.11 FAST_atomic_update FAST_cmpxchg @@ -653,6 +653,7 @@ uname unlink unlinkat unshare +uretprobe uselib userfaultfd usr26 diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list index 9ac42c3436..f1cfe8dc13 100644 --- a/sysdeps/unix/sysv/linux/syscalls.list +++ b/sysdeps/unix/sysv/linux/syscalls.list @@ -102,8 +102,8 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at setns EXTRA setns i:ii setns memfd_create EXTRA memfd_create i:si memfd_create -pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc -pkey_free EXTRA pkey_free i:i pkey_free +pkey_alloc EXTRA pkey_alloc i:ii __pkey_alloc pkey_alloc +pkey_free EXTRA pkey_free i:i __pkey_free pkey_free gettid EXTRA gettid Ei: __gettid gettid tgkill EXTRA tgkill i:iii __tgkill tgkill close_range - close_range i:iii __close_range close_range diff --git a/sysdeps/unix/sysv/linux/time.c b/sysdeps/unix/sysv/linux/time.c index f8b0cee41c..a56ef6fa30 100644 --- a/sysdeps/unix/sysv/linux/time.c +++ b/sysdeps/unix/sysv/linux/time.c @@ -33,11 +33,10 @@ time_syscall (time_t *t) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_time = dl_vdso_vsym (HAVE_TIME_VSYSCALL); +# define INIT_ARCH() libc_ifunc (time, - vdso_time ? VDSO_IFUNC_RET (vdso_time) - : (void *) time_syscall); + GLRO(dl_vdso_time) != NULL ? VDSO_IFUNC_RET (GLRO(dl_vdso_time)) + : (void *) time_syscall); # else time_t diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py index a1137eb1d5..2191bea36f 100644 --- a/sysdeps/unix/sysv/linux/tst-mman-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py @@ -33,7 +33,7 @@ def main(): help='C compiler (including options) to use') args = parser.parse_args() linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#define _GNU_SOURCE 1\n' '#include <sys/mman.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-mount-consts.py b/sysdeps/unix/sysv/linux/tst-mount-consts.py index 675f1790b6..b71d8a489d 100755 --- a/sysdeps/unix/sysv/linux/tst-mount-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mount-consts.py @@ -42,7 +42,7 @@ def main(): # Constants in glibc were updated to match Linux v6.10. When glibc # constants are updated this value should be updated to match the # released kernel version from which the constants were taken. - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) def check(cte, exclude=None): return glibcextract.compare_macro_consts( '#include <sys/mount.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py index 9824fd214d..9d53102cbd 100644 --- a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py +++ b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py @@ -39,7 +39,7 @@ def main(): sys.exit (77) linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#include <sys/pidfd.h>\n', '#include <asm/fcntl.h>\n' diff --git a/sysdeps/unix/sysv/linux/tst-sched-consts.py b/sysdeps/unix/sysv/linux/tst-sched-consts.py new file mode 100644 index 0000000000..f06ac400bd --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-sched-consts.py @@ -0,0 +1,56 @@ +#!/usr/bin/python3 +# Test that glibc's sched.h constants match the kernel's. +# Copyright (C) 2018-2024 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. + +import argparse +import sys + +import glibcextract +import glibcsyscalls + + +def main(): + """The main entry point.""" + parser = argparse.ArgumentParser( + description="Test that glibc's sched.h constants " + "match the kernel's.") + parser.add_argument('--cc', metavar='CC', + help='C compiler (including options) to use') + args = parser.parse_args() + linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) + linux_version_glibc = (6, 11) + sys.exit(glibcextract.compare_macro_consts( + '#define _GNU_SOURCE 1\n' + '#include <sched.h>\n', + '#define _GNU_SOURCE 1\n' + '#include <linux/sched.h>\n' + '#include <linux/sched/types.h>\n', + args.cc, + 'SCHED_.*', + # SCHED_ISO is reserved, but not implemented in the kernel. + # SCHED_OTHER is the standard name for SCHED_NORMAL. + # SCHED_FLAG_ALL will receive more and more flags, so + # exposing it to userspace does not seem useful. + 'SCHED_ISO' + '|SCHED_OTHER' + '|SCHED_FLAG_ALL', + linux_version_glibc > linux_version_headers, + linux_version_headers > linux_version_glibc)) + +if __name__ == '__main__': + main() diff --git a/sysdeps/unix/sysv/linux/tst-sched_setattr.c b/sysdeps/unix/sysv/linux/tst-sched_setattr.c new file mode 100644 index 0000000000..fbb73c31a7 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-sched_setattr.c @@ -0,0 +1,107 @@ +/* Tests for sched_setattr and sched_getattr. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sched.h> + +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <support/check.h> +#include <sys/resource.h> +#include <unistd.h> + +/* Padding struct to detect unexpected writes. */ +union +{ + struct sched_attr attr; + /* Hopefully the kernel will never need as much. */ + unsigned char padding[4096]; +} u; + +static void +check_unused (void) +{ + TEST_VERIFY (u.attr.size < sizeof (u)); + for (unsigned int i = u.attr.size; i < sizeof (u); ++i) + TEST_COMPARE (u.padding[i], 0xcc); +} + +static int +do_test (void) +{ + _Static_assert (SCHED_OTHER == SCHED_NORMAL, + "SCHED_OTHER, SCHED_NORMAL values"); + TEST_VERIFY (sizeof (struct sched_attr) < sizeof (u)); + + /* Check that reading and re-applying the current policy works. */ + memset (&u, 0xcc, sizeof (u)); + /* Compiler barrier to bypass write access attribute. */ + volatile unsigned int size = sizeof (u); + TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0); + check_unused (); + TEST_COMPARE (sched_setattr (0, &u.attr, 0), 0); /* Apply unchanged. */ + + /* Try to switch to the SCHED_OTHER policy. */ + memset (&u, 0, sizeof (u)); + u.attr.size = sizeof (u); /* With padding, kernel should accept zeroes. */ + u.attr.sched_policy = SCHED_OTHER; /* Should be the default. */ + { + errno = 0; + int prio = getpriority (PRIO_PROCESS, 0); + if (errno != 0) + prio = 0; + u.attr.sched_nice = prio; + } + TEST_COMPARE (sched_setattr (0, &u.attr, 0), 0); + + /* Non-zero values not known to the kernel result in an E2BIG error. */ + memset (&u, 0, sizeof (u)); + TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0); + u.padding[u.attr.size] = 0xcc; + u.attr.size = sizeof (u); + errno = 0; + TEST_COMPARE (sched_setattr (0, &u.attr, 0), -1); + TEST_COMPARE (errno, E2BIG); + + memset (&u, 0xcc, sizeof (u)); + TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0); + TEST_COMPARE (u.attr.sched_policy, SCHED_OTHER); + check_unused (); + + /* Raise the niceless level to 19 and observe its effect. */ + TEST_COMPARE (nice (19), 19); + TEST_COMPARE (sched_getattr (0, &u.attr, sizeof (u.attr), 0), 0); + TEST_COMPARE (u.attr.sched_policy, SCHED_OTHER); + TEST_COMPARE (u.attr.sched_nice, 19); + check_unused (); + + /* Invalid buffer arguments result in EINVAL (not EFAULT). */ + { + errno = 0; + void *volatile null_pointer = NULL; /* compiler barrier. */ + TEST_COMPARE (sched_setattr (0, null_pointer, 0), -1); + TEST_COMPARE (errno, EINVAL); + errno = 0; + TEST_COMPARE (sched_getattr (0, null_pointer, size, 0), -1); + TEST_COMPARE (errno, EINVAL); + } + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c new file mode 100644 index 0000000000..0ee7dc8517 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -0,0 +1,112 @@ +/* Test if a syscall is correctly restarted. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/xsignal.h> +#include <support/check.h> +#include <support/process_state.h> +#include <support/xunistd.h> +#include <support/xthread.h> +#include <sys/wait.h> + +static int +check_pid (pid_t pid) +{ + /* Wait until the child has called pause and it blocking on kernel. */ + support_process_state_wait (pid, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGSTOP), 0); + + /* Adding process_state_tracing_stop ('t') allows the test to work under + trace programs such as ptrace. */ + support_process_state_wait (pid, support_process_state_stopped + | support_process_state_tracing_stop); + + TEST_COMPARE (kill (pid, SIGCONT), 0); + + enum support_process_state state + = support_process_state_wait (pid, support_process_state_sleeping + | support_process_state_zombie); + + TEST_COMPARE (state, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGTERM), 0); + + siginfo_t info; + TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0); + TEST_COMPARE (info.si_signo, SIGCHLD); + TEST_COMPARE (info.si_code, CLD_KILLED); + TEST_COMPARE (info.si_status, SIGTERM); + TEST_COMPARE (info.si_pid, pid); + + return 0; +} + +static void * +tf (void *closure) +{ + pause (); + return NULL; +} + +static void +child_mt (void) +{ + /* Let only the created thread to handle signals. */ + sigset_t set; + sigfillset (&set); + xpthread_sigmask (SIG_BLOCK, &set, NULL); + + sigdelset (&set, SIGSTOP); + sigdelset (&set, SIGCONT); + sigdelset (&set, SIGTERM); + + pthread_attr_t attr; + xpthread_attr_init (&attr); + TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0); + + xpthread_join (xpthread_create (&attr, tf, NULL)); +} + +static void +do_test_syscall (bool multithread) +{ + pid_t pid = xfork (); + if (pid == 0) + { + if (multithread) + child_mt (); + else + pause (); + _exit (127); + } + + check_pid (pid); +} + +static int +do_test (void) +{ + /* Check for both single and multi thread, since they use different syscall + mechanisms. */ + do_test_syscall (false); + do_test_syscall (true); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/x86/pkey_get.c b/sysdeps/unix/sysv/linux/x86/pkey_get.c index eb988a5c45..0fb52ba18c 100644 --- a/sysdeps/unix/sysv/linux/x86/pkey_get.c +++ b/sysdeps/unix/sysv/linux/x86/pkey_get.c @@ -18,9 +18,10 @@ #include <arch-pkey.h> #include <errno.h> +#include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { if (key < 0 || key > 15) { @@ -31,3 +32,5 @@ pkey_get (int key) return (pkru >> (2 * key)) & 3; return 0; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/x86/pkey_set.c b/sysdeps/unix/sysv/linux/x86/pkey_set.c index 3fd2c95580..51e3d68d92 100644 --- a/sysdeps/unix/sysv/linux/x86/pkey_set.c +++ b/sysdeps/unix/sysv/linux/x86/pkey_set.c @@ -18,9 +18,10 @@ #include <arch-pkey.h> #include <errno.h> +#include <sys/mman.h> int -pkey_set (int key, unsigned int rights) +__pkey_set (int key, unsigned int rights) { if (key < 0 || key > 15 || rights > 3) { @@ -33,3 +34,5 @@ pkey_set (int key, unsigned int rights) pkey_write (pkru); return 0; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h index 5d86e75dd5..dfc10d0c7e 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h @@ -359,6 +359,7 @@ #define __NR_unlink 87 #define __NR_unlinkat 263 #define __NR_unshare 272 +#define __NR_uretprobe 335 #define __NR_uselib 134 #define __NR_userfaultfd 323 #define __NR_ustat 136 diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist index aea7848ed6..5acf49dbe8 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist @@ -2744,6 +2744,8 @@ GLIBC_2.4 sys_errlist D 0x420 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index 6c35068d8e..9340daa967 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -351,6 +351,7 @@ #define __NR_unlink 1073741911 #define __NR_unlinkat 1073742087 #define __NR_unshare 1073742096 +#define __NR_uretprobe 1073742159 #define __NR_userfaultfd 1073742147 #define __NR_ustat 1073741960 #define __NR_utime 1073741956 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist index 4ab3681914..02d1bb97dc 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist @@ -2763,3 +2763,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index c2e36dcbdf..e3c811549c 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1062,6 +1062,7 @@ float128: 1 ldouble: 1 Function: Imaginary part of "csin": +float: 1 float128: 1 Function: Real part of "csin_downward": @@ -1431,25 +1432,21 @@ float: 1 Function: "exp10m1": double: 4 -float: 2 float128: 3 ldouble: 4 Function: "exp10m1_downward": double: 3 -float: 3 float128: 6 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 float128: 6 ldouble: 5 Function: "exp10m1_upward": double: 5 -float: 4 float128: 6 ldouble: 6 @@ -1499,25 +1496,21 @@ float: 1 Function: "exp2m1": double: 2 -float: 2 float128: 2 ldouble: 3 Function: "exp2m1_downward": double: 3 -float: 3 float128: 3 ldouble: 6 Function: "exp2m1_towardzero": double: 3 -float: 2 float128: 4 ldouble: 5 Function: "exp2m1_upward": double: 3 -float: 3 float128: 5 ldouble: 6 @@ -1558,25 +1551,21 @@ float: 1 Function: "expm1": double: 1 -float: 1 float128: 2 ldouble: 3 Function: "expm1_downward": double: 1 -float: 1 float128: 2 ldouble: 4 Function: "expm1_towardzero": double: 1 -float: 2 float128: 4 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 float128: 3 ldouble: 4 @@ -1809,49 +1798,41 @@ float: 1 Function: "log10p1": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log10p1_downward": double: 2 -float: 3 float128: 4 ldouble: 8 Function: "log10p1_towardzero": double: 3 -float: 2 float128: 3 ldouble: 8 Function: "log10p1_upward": double: 2 -float: 3 float128: 4 ldouble: 6 Function: "log1p": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "log1p_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log1p_towardzero": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log1p_upward": double: 2 -float: 2 float128: 2 ldouble: 3 @@ -1921,25 +1902,21 @@ float: 1 Function: "log2p1": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log2p1_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log2p1_towardzero": double: 2 -float: 2 float128: 2 ldouble: 4 Function: "log2p1_upward": double: 2 -float: 2 float128: 3 ldouble: 5 @@ -1981,25 +1958,21 @@ float: 3 Function: "logp1": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "logp1_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "logp1_towardzero": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "logp1_upward": double: 2 -float: 2 float128: 2 ldouble: 3 @@ -2263,25 +2236,21 @@ double: 1 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index cbe09d49f4..e823d2fcc6 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -11,6 +11,8 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2 +CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2 CFLAGS-e_exp2f-fma.c = -mfma -mavx2 CFLAGS-e_expf-fma.c = -mfma -mavx2 @@ -72,6 +74,8 @@ libm-sysdep_routines += \ s_ceilf-sse4_1 \ s_cosf-fma \ s_cosf-sse2 \ + s_exp10m1f-fma \ + s_exp2m1f-fma \ s_expm1-fma \ s_floor-sse4_1 \ s_floorf-sse4_1 \ diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c new file mode 100644 index 0000000000..3dda04e2dd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c @@ -0,0 +1,4 @@ +#define __exp10m1f __exp10m1f_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c new file mode 100644 index 0000000000..8040b7ed79 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c @@ -0,0 +1,33 @@ +/* Multiple versions of exp10m1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include <libm-alias-float.h> + +extern float __redirect_exp10m1f (float); + +# define SYMBOL_NAME exp10m1f +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ()); +libm_alias_float (__exp10m1, exp10m1) + +# define __exp10m1f __exp10m1f_sse2 +#endif +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c new file mode 100644 index 0000000000..bfa00eae4e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c @@ -0,0 +1,4 @@ +#define __exp2m1f __exp2m1f_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/flt-32/s_exp2m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c new file mode 100644 index 0000000000..8e2d7ec384 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c @@ -0,0 +1,33 @@ +/* Multiple versions of exp2m1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include <libm-alias-float.h> + +extern float __redirect_exp2m1f (float); + +# define SYMBOL_NAME exp2m1f +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp2m1f, __exp2m1f, IFUNC_SELECTOR ()); +libm_alias_float (__exp2m1, exp2m1) + +# define __exp2m1f __exp2m1f_sse2 +#endif +#include <sysdeps/ieee754/flt-32/s_exp2m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 10e6e625b3..92409a2779 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 85725e29c0..b5b63fd125 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S index 06730ab2a1..cea034f394 100644 --- a/sysdeps/x86_64/multiarch/strcmp-evex.S +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -209,7 +209,9 @@ returned. */ .section SECTION(.text), "ax", @progbits - .align 16 + /* Align 64 bytes here. This is to get the L(loop) block ideally + aligned for the DSB. */ + .align 64 .type STRCMP, @function .globl STRCMP # ifdef USE_AS_STRCASECMP_L @@ -509,9 +511,7 @@ L(ret4): ret # endif - /* 32 byte align here ensures the main loop is ideally aligned - for DSB. */ - .p2align 5 + .p2align 4,, 4 L(more_3x_vec): /* Safe to compare 4x vectors. */ VMOVU (VEC_SIZE)(%rdi), %VMM(0) @@ -1426,10 +1426,9 @@ L(less_32_till_page): L(ret_zero_page_cross_slow_case0): xorl %eax, %eax ret -# endif - - +# else .p2align 4,, 10 +# endif L(less_16_till_page): cmpl $((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax ja L(less_8_till_page) @@ -1482,8 +1481,12 @@ L(less_16_till_page): # endif jmp L(prepare_loop_aligned) - - +# ifndef USE_AS_STRNCMP + /* Fits in aligning bytes. */ +L(ret_zero_4_loop): + xorl %eax, %eax + ret +# endif .p2align 4,, 10 L(less_8_till_page): @@ -1554,6 +1557,7 @@ L(ret_less_8_wcs): # ifdef USE_AS_STRNCMP .p2align 4,, 2 +L(ret_zero_4_loop): L(ret_zero_page_cross_slow_case1): xorl %eax, %eax ret @@ -1586,10 +1590,6 @@ L(less_4_loop): subq $-(CHAR_PER_VEC * 4), %rdx # endif jmp L(prepare_loop_aligned) - -L(ret_zero_4_loop): - xorl %eax, %eax - ret L(ret_less_4_loop): xorl %r8d, %eax subl %r8d, %eax |