diff options
Diffstat (limited to 'sysdeps')
279 files changed, 5908 insertions, 3107 deletions
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index cc15ce2d1e..015211f5f4 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -135,4 +135,11 @@ libmvec { _ZGVsMxv_tanh; _ZGVsMxv_tanhf; } + GLIBC_2.41 { + _ZGVnN2v_logp1; + _ZGVnN2v_logp1f; + _ZGVnN4v_logp1f; + _ZGVsMxv_logp1; + _ZGVsMxv_logp1f; + } } diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c index 8916dcbf40..004474acf9 100644 --- a/sysdeps/aarch64/fpu/acoshf_advsimd.c +++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c @@ -25,35 +25,32 @@ const static struct data { struct v_log1pf_data log1pf_consts; uint32x4_t one; - uint16x4_t thresh; -} data = { - .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, - .one = V4 (0x3f800000), - .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ -}; +} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) }; + +#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ static float32x4_t NOINLINE VPCS_ATTR special_case (float32x4_t x, float32x4_t y, uint16x4_t special, - const struct v_log1pf_data d) + const struct v_log1pf_data *d) { return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special)); } /* Vector approximation for single-precision acosh, based on log1p. Maximum error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it - is 2.78 ULP: - __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 - want 0x1.ef9ea2p-3. + is 3.00 ULP: + _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4 + want 0x1.ef0a7cp-4. With exceptions disabled, we can compute u with a shorter dependency chain, - which gives maximum error of 3.07 ULP: - __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 - want 0x1.fbc7f4p-4. */ + which gives maximum error of 3.22 ULP: + _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5 + want 0x1.fdcdd2p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); uint32x4_t ix = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh); + uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh); #if WANT_SIMD_EXCEPT /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use @@ -64,15 +61,16 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p); float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1); #else - float32x4_t xm1 = vsubq_f32 (x, v_f32 (1)); - float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f))); + float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one)); + float32x4_t u + = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one))); #endif float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u)); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, special, d->log1pf_consts); - return log1pf_inline (y, d->log1pf_consts); + return special_case (x, y, special, &d->log1pf_consts); + return log1pf_inline (y, &d->log1pf_consts); } libmvec_hidden_def (V_NAME_F1 (acosh)) HALF_WIDTH_ALIAS_F1 (acosh) diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 097d403ffe..5909bb4ce9 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -36,6 +36,7 @@ libmvec_hidden_proto (V_NAME_F2(hypot)); libmvec_hidden_proto (V_NAME_F1(log10)); libmvec_hidden_proto (V_NAME_F1(log1p)); libmvec_hidden_proto (V_NAME_F1(log2)); +libmvec_hidden_proto (V_NAME_F1(logp1)); libmvec_hidden_proto (V_NAME_F1(log)); libmvec_hidden_proto (V_NAME_F2(pow)); libmvec_hidden_proto (V_NAME_F1(sin)); diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c index 09fd8a6143..eb789b91b6 100644 --- a/sysdeps/aarch64/fpu/asinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c @@ -20,16 +20,16 @@ #include "v_math.h" #include "v_log1pf_inline.h" -#define SignMask v_u32 (0x80000000) - const static struct data { struct v_log1pf_data log1pf_consts; + float32x4_t one; uint32x4_t big_bound; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound; #endif } data = { + .one = V4 (1), .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .big_bound = V4 (0x5f800000), /* asuint(0x1p64). */ #if WANT_SIMD_EXCEPT @@ -38,20 +38,27 @@ const static struct data }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t sign, float32x4_t y, + uint32x4_t special, const struct data *d) { - return v_call_f32 (asinhf, x, y, special); + return v_call_f32 ( + asinhf, x, + vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))), + special); } /* Single-precision implementation of vector asinh(x), using vector log1p. - Worst-case error is 2.66 ULP, at roughly +/-0.25: - __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ + Worst-case error is 2.59 ULP: + _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3 + want 0x1.d449c4p-3. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) { const struct data *dat = ptr_barrier (&data); - uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask); - float32x4_t ax = vreinterpretq_f32_u32 (iax); + float32x4_t ax = vabsq_f32 (x); + uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t special = vcgeq_u32 (iax, dat->big_bound); + uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax); float32x4_t special_arg = x; #if WANT_SIMD_EXCEPT @@ -68,13 +75,13 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) /* asinh(x) = log(x + sqrt(x * x + 1)). For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */ float32x4_t d - = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x))); - float32x4_t y = log1pf_inline ( - vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts); + = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax))); + float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)); if (__glibc_unlikely (v_any_u32 (special))) - return special_case (special_arg, vbslq_f32 (SignMask, x, y), special); - return vbslq_f32 (SignMask, x, y); + return special_case (special_arg, sign, y, special, dat); + return vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts)))); } libmvec_hidden_def (V_NAME_F1 (asinh)) HALF_WIDTH_ALIAS_F1 (asinh) diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c index ae488f7b54..818b6c92ad 100644 --- a/sysdeps/aarch64/fpu/atanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c @@ -40,15 +40,17 @@ const static struct data #define Half v_u32 (0x3f000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y, + uint32x4_t special) { - return v_call_f32 (atanhf, x, y, special); + return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign), + vmulq_f32 (halfsign, y), special); } /* Approximation for vector single-precision atanh(x) using modified log1p. - The maximum error is 3.08 ULP: - __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5 - want 0x1.ffcb82p-5. */ + The maximum error is 2.93 ULP: + _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5 + want 0x1.f4dcf8p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); @@ -68,11 +70,19 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) uint32x4_t special = vcgeq_u32 (iax, d->one); #endif - float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax)); - y = log1pf_inline (y, d->log1pf_consts); + float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), + vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax)); + y = log1pf_inline (y, &d->log1pf_consts); + /* If exceptions not required, pass ax to special-case for shorter dependency + chain. If exceptions are required ax will have been zerofied, so have to + pass x. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (halfsign, y), special); +#if WANT_SIMD_EXCEPT + return special_case (x, halfsign, y, special); +#else + return special_case (ax, halfsign, y, special); +#endif return vmulq_f32 (halfsign, y); } libmvec_hidden_def (V_NAME_F1 (atanh)) diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index 7484150131..f295fe185d 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -113,6 +113,10 @@ # define __DECL_SIMD_log2 __DECL_SIMD_aarch64 # undef __DECL_SIMD_log2f # define __DECL_SIMD_log2f __DECL_SIMD_aarch64 +# undef __DECL_SIMD_logp1 +# define __DECL_SIMD_logp1 __DECL_SIMD_aarch64 +# undef __DECL_SIMD_logp1f +# define __DECL_SIMD_logp1f __DECL_SIMD_aarch64 # undef __DECL_SIMD_pow # define __DECL_SIMD_pow __DECL_SIMD_aarch64 # undef __DECL_SIMD_powf @@ -180,6 +184,7 @@ __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t); @@ -207,6 +212,7 @@ __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t); @@ -239,6 +245,7 @@ __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t); @@ -266,6 +273,7 @@ __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c index 3924c9ce44..11a89b1530 100644 --- a/sysdeps/aarch64/fpu/cos_advsimd.c +++ b/sysdeps/aarch64/fpu/cos_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* Worst-case error is 3.3 ulp in [-pi/2, pi/2]. */ .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), @@ -30,11 +30,9 @@ static const struct data V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33), V2 (-0x1.9e9540300a1p-41) }, .inv_pi = V2 (0x1.45f306dc9c883p-2), - .half_pi = V2 (0x1.921fb54442d18p+0), .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), .range_val = V2 (0x1p23) }; @@ -68,10 +66,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi)); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); - n = vsubq_f64 (n, v_f64 (0.5)); + n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); + n = vsubq_f64 (n, v_f64 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c index d0c285b03a..85a1b37373 100644 --- a/sysdeps/aarch64/fpu/cosf_advsimd.c +++ b/sysdeps/aarch64/fpu/cosf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,8 +33,6 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), - .half_pi = V4 (0x1.921fb6p0f), .range_val = V4 (0x1p20f) }; @@ -69,9 +67,8 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi)); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); n = vsubq_f32 (n, v_f32 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c index e5d8a299c6..7ad6efa0fc 100644 --- a/sysdeps/aarch64/fpu/coshf_sve.c +++ b/sysdeps/aarch64/fpu/coshf_sve.c @@ -23,37 +23,42 @@ static const struct data { struct sv_expf_data expf_consts; - uint32_t special_bound; + float special_bound; } data = { .expf_consts = SV_EXPF_DATA, /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */ - .special_bound = 0x42ad496c, + .special_bound = 0x1.5a92d8p+6, }; static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t pg) +special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e, + svbool_t pg) { - return sv_call_f32 (coshf, x, y, pg); + return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e), + pg); } /* Single-precision vector cosh, using vector expf. - Maximum error is 1.89 ULP: - _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127 - want 0x1.f00adcp+127. */ + Maximum error is 2.77 ULP: + _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2 + want 0x1.e4594cp+2. */ svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svfloat32_t ax = svabs_x (pg, x); - svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound); + svbool_t special = svacge (pg, x, d->special_bound); - /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ - svfloat32_t t = expf_inline (ax, pg, &d->expf_consts); - svfloat32_t half_t = svmul_x (pg, t, 0.5); - svfloat32_t half_over_t = svdivr_x (pg, t, 0.5); + /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. + Note that x is passed to exp here, rather than |x|. This is to avoid using + destructive unary ABS for better register usage. However it means the + routine is not exactly symmetrical, as the exp helper is slightly less + accurate in the negative range. */ + svfloat32_t e = expf_inline (x, pg, &d->expf_consts); + svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5); + svfloat32_t half_over_e = svdivr_x (pg, e, 0.5); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (x, half_e, half_over_e, special); - return svadd_x (pg, half_t, half_over_t); + return svadd_x (svptrue_b32 (), half_e, half_over_e); } diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c index e09b2f3b27..8aa3fa9c43 100644 --- a/sysdeps/aarch64/fpu/exp10f_sve.c +++ b/sysdeps/aarch64/fpu/exp10f_sve.c @@ -18,74 +18,83 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f32.h" -/* For x < -SpecialBound, the result is subnormal and not handled correctly by +/* For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ -#define SpecialBound 37.9 +#define Thres 37.9 static const struct data { - float poly[5]; - float shift, log10_2, log2_10_hi, log2_10_lo, special_bound; + float log2_10_lo, c0, c2, c4; + float c1, c3, log10_2; + float shift, log2_10_hi, thres; } data = { /* Coefficients generated using Remez algorithm with minimisation of relative error. rel error: 0x1.89dafa3p-24 abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] maxerr: 0.52 +0.5 ulp. */ - .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f, - 0x1.12b41ap-1f }, + .c0 = 0x1.26bb16p+1f, + .c1 = 0x1.5350d2p+1f, + .c2 = 0x1.04744ap+1f, + .c3 = 0x1.2d8176p+0f, + .c4 = 0x1.12b41ap-1f, /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, .log10_2 = 0x1.a934fp+1, .log2_10_hi = 0x1.344136p-2, .log2_10_lo = -0x1.ec10cp-27, - .special_bound = SpecialBound, + .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +static inline svfloat32_t +sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - return sv_call_f32 (exp10f, x, y, special); -} - -/* Single-precision SVE exp10f routine. Implements the same algorithm - as AdvSIMD exp10f. - Worst case error is 1.02 ULPs. - _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 - want 0x1.ba5f9cp-1. */ -svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) -{ - const struct data *d = ptr_barrier (&data); /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */ - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo); /* n = round(x/(log10(2)/N)). */ svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0); - svfloat32_t n = svsub_x (pg, z, shift); + svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, shift); /* r = x - n*log10(2)/N. */ - svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1); - r = svmls_lane (r, n, log10_2_and_inv, 2); + svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x); + r = svmls_lane (r, n, lane_consts, 0); - svbool_t special = svacgt (pg, x, d->special_bound); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t poly - = svmla_x (pg, svmul_x (pg, r, d->poly[0]), - sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2); - - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t p14 = svmla_x (pg, p12, p34, r2); + svfloat32_t p0 = svmul_lane (r, lane_consts, 1); + svfloat32_t poly = svmla_x (pg, p0, r2, p14); return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp10f routine. Implements the same algorithm + as AdvSIMD exp10f. + Worst case error is 1.02 ULPs. + _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 + want 0x1.ba5f9cp-1. */ +svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp10f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c index 8a686e3e05..c6216bed9e 100644 --- a/sysdeps/aarch64/fpu/exp2f_sve.c +++ b/sysdeps/aarch64/fpu/exp2f_sve.c @@ -24,54 +24,64 @@ static const struct data { - float poly[5]; + float c0, c2, c4, c1, c3; float shift, thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, - 0x1.59977ap-10f }, + /* Coefficients copied from the polynomial in AdvSIMD variant. */ + .c0 = 0x1.62e422p-1f, + .c1 = 0x1.ebf9bcp-3f, + .c2 = 0x1.c6bd32p-5f, + .c3 = 0x1.3ce9e4p-7f, + .c4 = 0x1.59977ap-10f, /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) -{ - return sv_call_f32 (exp2f, x, y, special); -} - -/* Single-precision SVE exp2f routine. Implements the same algorithm - as AdvSIMD exp2f. - Worst case error is 1.04 ULPs. - SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 - want 0x1.ba7ebp+0. */ -svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +static inline svfloat32_t +sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - const struct data *d = ptr_barrier (&data); /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = n + r, with r in [-1/2, 1/2]. */ - svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svadd_x (pg, x, shift); - svfloat32_t n = svsub_x (pg, z, shift); - svfloat32_t r = svsub_x (pg, x, n); + svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift); + svfloat32_t r = svsub_x (svptrue_b32 (), x, n); - svbool_t special = svacgt (pg, x, d->thres); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp2(r)-1. Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for coefficients 1 to 4, and apply most significant coefficient directly. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1); - svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); + svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2); + svfloat32_t p14 = svmla_x (pg, p12, r2, p34); + svfloat32_t p0 = svmul_lane (r, even_coeffs, 0); svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); - return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp2f routine. Implements the same algorithm + as AdvSIMD exp2f. + Worst case error is 1.04 ULPs. + _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1 + want 0x1.ba6a64p-1. */ +svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp2f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c index 99d2e647aa..5c9cb72620 100644 --- a/sysdeps/aarch64/fpu/expf_advsimd.c +++ b/sysdeps/aarch64/fpu/expf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[5]; - float32x4_t shift, inv_ln2, ln2_hi, ln2_lo; + float32x4_t inv_ln2, ln2_hi, ln2_lo; uint32x4_t exponent_bias; #if !WANT_SIMD_EXCEPT float32x4_t special_bound, scale_thresh; @@ -31,7 +31,6 @@ static const struct data /* maxerr: 1.45358 +0.5 ulp. */ .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, - .shift = V4 (0x1.8p23f), .inv_ln2 = V4 (0x1.715476p+0f), .ln2_hi = V4 (0x1.62e4p-1f), .ln2_lo = V4 (0x1.7f7d1cp-20f), @@ -85,7 +84,7 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly, z; + float32x4_t n, r, r2, scale, p, q, poly; uint32x4_t cmp, e; #if WANT_SIMD_EXCEPT @@ -104,11 +103,10 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - z = vfmaq_f32 (d->shift, x, d->inv_ln2); - n = vsubq_f32 (z, d->shift); + n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); r = vfmsq_f32 (x, n, d->ln2_hi); r = vfmsq_f32 (r, n, d->ln2_lo); - e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c index 3ba79bc4f1..da93e01b87 100644 --- a/sysdeps/aarch64/fpu/expf_sve.c +++ b/sysdeps/aarch64/fpu/expf_sve.c @@ -18,33 +18,25 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" +#include "sv_expf_inline.h" + +/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled + correctly by FEXPA. */ +#define Thres 0x1.5d5e2ap+6f static const struct data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift, thres; + struct sv_expf_data d; + float thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, - 0x1.0e4020p-7f }, - .inv_ln2 = 0x1.715476p+0f, - .ln2_hi = 0x1.62e4p-1f, - .ln2_lo = 0x1.7f7d1cp-20f, - /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, - /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled - correctly by FEXPA. */ - .thres = 0x1.5d5e2ap+6f, + .d = SV_EXPF_DATA, + .thres = Thres, }; -#define C(i) sv_f32 (d->poly[i]) -#define ExponentBias 0x3f800000 - static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d) { - return sv_call_f32 (expf, x, y, special); + return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special); } /* Optimised single-precision SVE exp function. @@ -54,36 +46,8 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2); - - /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0); - svfloat32_t n = svsub_x (pg, z, d->shift); - - /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1); - r = svmls_lane (r, n, invln2_and_ln2, 2); - - /* scale = 2^(n/N). */ svbool_t is_special_case = svacgt (pg, x, d->thres); - svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_x (pg, C (3), C (4), r); - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_x (pg, r, C (0)); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, is_special_case))) - return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case); - - return svmla_x (pg, scale, scale, poly); + return special_case (x, is_special_case, &d->d); + return expf_inline (x, pg, &d->d); } diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c index a0616ec754..8303ca296e 100644 --- a/sysdeps/aarch64/fpu/expm1f_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c @@ -18,27 +18,18 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_expm1f_inline.h" static const struct data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; - int32x4_t exponent_bias; + struct v_expm1f_data d; #if WANT_SIMD_EXCEPT uint32x4_t thresh; #else float32x4_t oflow_bound; #endif } data = { - /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, - /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, - .shift = V4 (0x1.8p23f), - .exponent_bias = V4 (0x3f800000), + .d = V_EXPM1F_DATA, #if !WANT_SIMD_EXCEPT /* Value above which expm1f(x) should overflow. Absolute value of the underflow bound is greater than this, so it catches both cases - there is @@ -55,67 +46,38 @@ static const struct data #define TinyBound v_u32 (0x34000000 << 1) static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t special, const struct data *d) { - return v_call_f32 (expm1f, x, y, special); + return v_call_f32 ( + expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special); } /* Single-precision vector exp(x) - 1 function. - The maximum error is 1.51 ULP: - _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2 - want 0x1.e2fb94p-2. */ + The maximum error is 1.62 ULP: + _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2 + want 0x1.da9f44p-2. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); #if WANT_SIMD_EXCEPT + uint32x4_t ix = vreinterpretq_u32_f32 (x); /* If fp exceptions are to be triggered correctly, fall back to scalar for |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for shift-left by 1, and compare with thresh which was left-shifted offline - this is effectively an absolute compare. */ uint32x4_t special = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh); - if (__glibc_unlikely (v_any_u32 (special))) - x = v_zerofy_f32 (x, special); #else /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); #endif - /* Reduce argument to smaller range: - Let i = round(x / ln2) - and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where 2^i is exact because i is an integer. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); - int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); - - /* Approximate expm1(f) using polynomial. - Taylor expansion for expm1(x) has the form: - x + ax^2 + bx^3 + cx^4 .... - So we calculate the polynomial P(f) = a + bf + cf^2 + ... - and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - float32x4_t p = v_horner_4_f32 (f, d->poly); - p = vfmaq_f32 (f, vmulq_f32 (f, f), p); - - /* Assemble the result. - expm1(x) ~= 2^i * (p + 1) - 1 - Let t = 2^i. */ - int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias); - float32x4_t t = vreinterpretq_f32_s32 (u); - if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t), - special); + return special_case (x, special, d); /* expm1(x) ~= p * t + (t - 1). */ - return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t); + return expm1f_inline (x, &d->d); } libmvec_hidden_def (V_NAME_F1 (expm1)) HALF_WIDTH_ALIAS_F1 (expm1) diff --git a/sysdeps/aarch64/fpu/log10f_advsimd.c b/sysdeps/aarch64/fpu/log10f_advsimd.c index 9347422a77..82228b599a 100644 --- a/sysdeps/aarch64/fpu/log10f_advsimd.c +++ b/sysdeps/aarch64/fpu/log10f_advsimd.c @@ -22,11 +22,11 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; + uint32x4_t mantissa_mask; float32x4_t poly[8]; float32x4_t inv_ln10, ln2; - uint32x4_t off, mantissa_mask; } data = { /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */ @@ -35,18 +35,22 @@ static const struct data V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) }, .ln2 = V4 (0x1.62e43p-1f), .inv_ln10 = V4 (0x1.bcb7b2p-2f), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, - uint16x4_t cmp) +special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); + return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); } /* Fast implementation of AdvSIMD log10f, @@ -58,15 +62,21 @@ special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log10(1+r) + n * log10(2). */ @@ -77,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) y = vmulq_f32 (y, d->inv_ln10); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, poly, r2, special); + return special_case (y, u_off, poly, r2, special, d); return vfmaq_f32 (y, poly, r2); } libmvec_hidden_def (V_NAME_F1 (log10)) diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c index bdbb49cd32..7913679f67 100644 --- a/sysdeps/aarch64/fpu/log10f_sve.c +++ b/sysdeps/aarch64/fpu/log10f_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0246[4]; float poly_1357[4]; float ln2, inv_ln10; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -35,18 +36,23 @@ static const struct data -0x1.0fc92cp-4f }, .ln2 = 0x1.62e43p-1f, .inv_ln10 = 0x1.bcb7b2p-2f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min 0x00800000 -#define Max 0x7f800000 -#define Thres 0x7f000000 /* Max - Min. */ -#define Offset 0x3f2aaaab /* 0.666667. */ +#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000. */ #define MantissaMask 0x007fffff static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log10f, x, y, special); + return sv_call_f32 ( + log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log10f using the same algorithm and @@ -57,23 +63,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t ix = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres); + + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - ix = svsub_x (pg, ix, Offset); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend. */ - ix = svand_x (pg, ix, MantissaMask); - ix = svadd_x (pg, ix, Offset); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend. */ + svuint32_t ix = svand_x (pg, u_off, MantissaMask); + ix = svadd_x (pg, ix, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f); /* y = log10(1+r) + n*log10(2) log10(1+r) ~ r * InvLn(10) + P(r) where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t r4 = svmul_x (pg, r2, r2); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2); svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0); svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1); @@ -88,7 +96,6 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) hi = svmul_x (pg, hi, d->inv_ln10); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y), - special); - return svmla_x (pg, hi, r2, y); + return special_case (u_off, hi, r2, y, special); + return svmla_x (svptrue_b32 (), hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c index ffc418fc9c..114064c696 100644 --- a/sysdeps/aarch64/fpu/log1p_advsimd.c +++ b/sysdeps/aarch64/fpu/log1p_advsimd.c @@ -127,3 +127,5 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x) return vfmaq_f64 (y, f2, p); } + +strong_alias (V_NAME_D1 (log1p), V_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c index 04f7e5720e..b21cfb2c90 100644 --- a/sysdeps/aarch64/fpu/log1p_sve.c +++ b/sysdeps/aarch64/fpu/log1p_sve.c @@ -116,3 +116,5 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) return y; } + +strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c index dc15334a85..00006fc703 100644 --- a/sysdeps/aarch64/fpu/log1pf_advsimd.c +++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c @@ -18,113 +18,81 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_log1pf_inline.h" + +#if WANT_SIMD_EXCEPT const static struct data { - float32x4_t poly[8], ln2; - uint32x4_t tiny_bound, minus_one, four, thresh; - int32x4_t three_quarters; + uint32x4_t minus_one, thresh; + struct v_log1pf_data d; } data = { - .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients - (1, -0.5) are not stored as they can be generated more - efficiently. */ - V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, - .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ - .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound. */ + .d = V_LOG1PF_CONSTANTS_TABLE, + .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound. */ .minus_one = V4 (0xbf800000), - .four = V4 (0x40800000), - .three_quarters = V4 (0x3f400000) }; -static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *p) -{ - /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme. */ - float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]); - float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]); - float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]); - float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]); - - float32x4_t m2 = vmulq_f32 (m, m); - float32x4_t p_02 = vfmaq_f32 (m, m2, p_12); - float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56); - float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]); - - float32x4_t m4 = vmulq_f32 (m2, m2); - float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36); - return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79)); -} +/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ +# define TinyBound v_u32 (0x34000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t cmp, const struct data *d) { - return v_call_f32 (log1pf, x, y, special); + /* Side-step special lanes so fenv exceptions are not triggered + inadvertently. */ + float32x4_t x_nospecial = v_zerofy_f32 (x, cmp); + return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp); } -/* Vector log1pf approximation using polynomial on reduced interval. Accuracy - is roughly 2.02 ULP: - log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */ +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.69 ULP: + _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3 + want 0x1.cfcbdcp-3. */ VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x)); + uint32x4_t special_cases - = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh), + = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh), vcgeq_u32 (ix, d->minus_one)); - float32x4_t special_arg = x; -#if WANT_SIMD_EXCEPT if (__glibc_unlikely (v_any_u32 (special_cases))) - /* Side-step special lanes so fenv exceptions are not triggered - inadvertently. */ - x = v_zerofy_f32 (x, special_cases); -#endif + return special_case (x, special_cases, d); - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ + return log1pf_inline (x, &d->d); +} - float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); +#else - /* Choose k to scale x to the range [-1/4, 1/2]. */ - int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), - v_s32 (0xff800000)); - uint32x4_t ku = vreinterpretq_u32_s32 (k); +const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE; - /* Scale x by exponent manipulation. */ - float32x4_t m_scale - = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); +static float32x4_t NOINLINE VPCS_ATTR +special_case (float32x4_t x, uint32x4_t cmp) +{ + return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp); +} - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number, and scale m down accordingly. */ - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); - m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.63 ULP: + _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3 + want 0x1.fdcb16p-3. */ +VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) +{ + uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)), + vcaleq_f32 (x, v_f32 (0x1p127f))); - /* Evaluate polynomial on the reduced interval. */ - float32x4_t p = eval_poly (m_scale, d->poly); + if (__glibc_unlikely (v_any_u32 (special_cases))) + return special_case (x, special_cases); - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23)); + return log1pf_inline (x, ptr_barrier (&data)); +} - /* Apply the scaling back. */ - float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2); +#endif - if (__glibc_unlikely (v_any_u32 (special_cases))) - return special_case (special_arg, y, special_cases); - return y; -} libmvec_hidden_def (V_NAME_F1 (log1p)) HALF_WIDTH_ALIAS_F1 (log1p) +strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1)) +libmvec_hidden_def (V_NAME_F1 (logp1)) +HALF_WIDTH_ALIAS_F1 (logp1) diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c index f645cc997e..5256d5e94c 100644 --- a/sysdeps/aarch64/fpu/log1pf_sve.c +++ b/sysdeps/aarch64/fpu/log1pf_sve.c @@ -98,3 +98,5 @@ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) return y; } + +strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log2f_advsimd.c b/sysdeps/aarch64/fpu/log2f_advsimd.c index db21836749..84effe4fe9 100644 --- a/sysdeps/aarch64/fpu/log2f_advsimd.c +++ b/sysdeps/aarch64/fpu/log2f_advsimd.c @@ -22,9 +22,9 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; - uint32x4_t off, mantissa_mask; + uint32x4_t mantissa_mask; float32x4_t poly[9]; } data = { /* Coefficients generated using Remez algorithm approximate @@ -34,18 +34,22 @@ static const struct data V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f), V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f), V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) }, - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, - uint16x4_t cmp) +special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); + return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); } /* Fast implementation for single precision AdvSIMD log2, @@ -56,15 +60,21 @@ special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log2(1+r) + n. */ @@ -72,7 +82,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, n, p, r, special); + return special_case (n, u_off, p, r, special, d); return vfmaq_f32 (n, p, r); } libmvec_hidden_def (V_NAME_F1 (log2)) diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c index 5031c42483..939d89bfb9 100644 --- a/sysdeps/aarch64/fpu/log2f_sve.c +++ b/sysdeps/aarch64/fpu/log2f_sve.c @@ -23,6 +23,7 @@ static const struct data { float poly_02468[5]; float poly_1357[4]; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -32,18 +33,23 @@ static const struct data }, .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f, 0x1.9d8ecap-3f, 0x1.9e495p-3f }, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thres (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define MantissaMask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log2f, x, y, cmp); + return sv_call_f32 ( + log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log2f, using the same algorithm @@ -55,19 +61,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, MantissaMask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + svuint32_t u = svand_x (pg, u_off, MantissaMask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log2(1+r) + n. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* Evaluate polynomial using pairwise Horner scheme. */ svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); @@ -81,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) y = svmla_x (pg, q_01, r2, y); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special); - return svmla_x (pg, n, r, y); + return special_case (u_off, n, r, y, special); + return svmla_x (svptrue_b32 (), n, r, y); } diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c index 3c0d0fcdc7..c20dbfd6c0 100644 --- a/sysdeps/aarch64/fpu/logf_advsimd.c +++ b/sysdeps/aarch64/fpu/logf_advsimd.c @@ -21,20 +21,22 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; + uint32x4_t mantissa_mask; float32x4_t poly[7]; - float32x4_t ln2, tiny_bound; - uint32x4_t off, mantissa_mask; + float32x4_t ln2; } data = { /* 3.34 ulp error. */ .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f), V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f), V4 (-0x1.ffffc8p-2f) }, .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x1p-126), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff) }; @@ -42,32 +44,37 @@ static const struct data #define P(i) d->poly[7 - i] static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p, - uint16x4_t cmp) +special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); + return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); } float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) { const struct data *d = ptr_barrier (&data); float32x4_t n, p, q, r, r2, y; - uint32x4_t u; + uint32x4_t u, u_off; uint16x4_t cmp; - u = vreinterpretq_u32_f32 (x); - cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vandq_u32 (u, d->mantissa_mask); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + u = vandq_u32 (u_off, d->mantissa_mask); u = vaddq_u32 (u, d->off); r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); + cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + /* y = log(1+r) + n*ln2. */ r2 = vmulq_f32 (r, r); /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */ @@ -80,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) p = vfmaq_f32 (r, d->ln2, n); if (__glibc_unlikely (v_any_u16h (cmp))) - return special_case (x, y, r2, p, cmp); + return special_case (p, u_off, y, r2, cmp, d); return vfmaq_f32 (p, y, r2); } libmvec_hidden_def (V_NAME_F1 (log)) diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c index d64e810cfe..5b9324678d 100644 --- a/sysdeps/aarch64/fpu/logf_sve.c +++ b/sysdeps/aarch64/fpu/logf_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0135[4]; float poly_246[3]; float ln2; + uint32_t off, lower; } data = { .poly_0135 = { /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so @@ -32,19 +33,24 @@ static const struct data -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f }, .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f }, - .ln2 = 0x1.62e43p-1f + .ln2 = 0x1.62e43p-1f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thresh (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define Mask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (logf, x, y, cmp); + return sv_call_f32 ( + logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE logf, using the same algorithm and @@ -55,19 +61,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, Mask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + + svuint32_t u = svand_x (pg, u_off, Mask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log(1+r) + n*ln2. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */ svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]); svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1); @@ -80,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) p = svmla_x (pg, r, n, d->ln2); if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp); + return special_case (u_off, p, r2, y, cmp); return svmla_x (pg, p, r2, y); } diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c index a0d9d3b819..718125cbad 100644 --- a/sysdeps/aarch64/fpu/sin_advsimd.c +++ b/sysdeps/aarch64/fpu/sin_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19), @@ -34,12 +34,13 @@ static const struct data .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255). */ -# define Thresh v_u64 (0x1160000000000000) /* RangeVal - TinyBound. */ +/* asuint64(0x1p-253)), below which multiply by inv_pi underflows. */ +# define TinyBound v_u64 (0x3020000000000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u64 (0x1160000000000000) #endif #define C(i) d->poly[i] @@ -72,16 +73,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) fenv). These lanes will be fixed by special-case handler later. */ uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x)); cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh); - r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); + r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp)); #else r = x; cmp = vcageq_f64 (x, d->range_val); #endif /* n = rint(|x|/pi). */ - n = vfmaq_f64 (d->shift, d->inv_pi, r); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); + n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c index 375dfc3331..6ee9a23d5b 100644 --- a/sysdeps/aarch64/fpu/sinf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,13 +33,14 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), .range_val = V4 (0x1p20f) }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f). */ -# define Thresh v_u32 (0x28800000) /* RangeVal - TinyBound. */ +/* asuint32(0x1p-59f), below which multiply by inv_pi underflows. */ +# define TinyBound v_u32 (0x22000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u32 (0x27800000) #endif #define C(i) d->poly[i] @@ -64,23 +65,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) /* If fenv exceptions are to be triggered correctly, set any special lanes to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by special-case handler later. */ - r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); + r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp)); #else r = x; cmp = vcageq_f32 (x, d->range_val); #endif - /* n = rint(|x|/pi) */ - n = vfmaq_f32 (d->shift, d->inv_pi, r); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + /* n = rint(|x|/pi). */ + n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); - /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f32 (r, d->pi_1, n); r = vfmsq_f32 (r, d->pi_2, n); r = vfmsq_f32 (r, d->pi_3, n); - /* y = sin(r) */ + /* y = sin(r). */ r2 = vmulq_f32 (r, r); y = vfmaq_f32 (C (2), C (3), r2); y = vfmaq_f32 (C (1), y, r2); diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c index 6bb7482dc2..c6ed7598e7 100644 --- a/sysdeps/aarch64/fpu/sinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c @@ -23,15 +23,13 @@ static const struct data { struct v_expm1f_data expm1f_consts; - uint32x4_t halff; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound, thresh; #else - uint32x4_t oflow_bound; + float32x4_t oflow_bound; #endif } data = { .expm1f_consts = V_EXPM1F_DATA, - .halff = V4 (0x3f000000), #if WANT_SIMD_EXCEPT /* 0x1.6a09e8p-32, below which expm1f underflows. */ .tiny_bound = V4 (0x2fb504f4), @@ -39,14 +37,15 @@ static const struct data .thresh = V4 (0x12fbbbb3), #else /* 0x1.61814ep+6, above which expm1f helper overflows. */ - .oflow_bound = V4 (0x42b0c0a7), + .oflow_bound = V4 (0x1.61814ep+6), #endif }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign, + uint32x4_t special) { - return v_call_f32 (sinhf, x, y, special); + return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special); } /* Approximation for vector single-precision sinh(x) using expm1. @@ -60,15 +59,15 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) uint32x4_t ix = vreinterpretq_u32_f32 (x); float32x4_t ax = vabsq_f32 (x); - uint32x4_t iax = vreinterpretq_u32_f32 (ax); - uint32x4_t sign = veorq_u32 (ix, iax); - float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff)); + float32x4_t halfsign = vreinterpretq_f32_u32 ( + vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5)))); #if WANT_SIMD_EXCEPT - uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh); + uint32x4_t special = vcgeq_u32 ( + vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh); ax = v_zerofy_f32 (ax, special); #else - uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound); + uint32x4_t special = vcageq_f32 (x, d->oflow_bound); #endif /* Up to the point that expm1f overflows, we can use it to calculate sinhf @@ -80,7 +79,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) /* Fall back to the scalar variant for any lanes that should trigger an exception. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (t, halfsign), special); + return special_case (x, t, halfsign, special); return vmulq_f32 (t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h index 23963b5f8e..6166df6553 100644 --- a/sysdeps/aarch64/fpu/sv_expf_inline.h +++ b/sysdeps/aarch64/fpu/sv_expf_inline.h @@ -24,19 +24,20 @@ struct sv_expf_data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift; + float c1, c3, inv_ln2; + float ln2_lo, c0, c2, c4; + float ln2_hi, shift; }; /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */ #define SV_EXPF_DATA \ { \ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \ - 0x1.0e4020p-7f }, \ - \ - .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ - .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \ + /* Coefficients copied from the polynomial in AdvSIMD variant. */ \ + .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f, \ + .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ + .shift = 0x1.803f8p17f, \ } #define C(i) sv_f32 (d->poly[i]) @@ -47,26 +48,25 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - /* Load some constants in quad-word chunks to minimise memory access. */ - svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo); /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1); + svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift); svfloat32_t n = svsub_x (pg, z, d->shift); /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2); - r = svmls_lane (r, n, c4_invln2_and_ln2, 3); + svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x); + r = svmls_lane (r, n, lane_consts, 0); /* scale = 2^(n/N). */ - svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z)); + svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0); - svfloat32_t r2 = svmul_f32_x (pg, r, r); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_f32_x (pg, r, C (0)); + svfloat32_t p0 = svmul_lane (r, lane_consts, 1); svfloat32_t poly = svmla_x (pg, p0, r2, p14); return svmla_x (pg, scale, scale, poly); diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c index 50defd6ef0..3ced9b7a41 100644 --- a/sysdeps/aarch64/fpu/tanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c @@ -28,13 +28,16 @@ static const struct data /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */ .boring_bound = V4 (0x41102cb3), .large_bound = V4 (0x7f800000), - .onef = V4 (0x3f800000), }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring, + float32x4_t q, uint32x4_t special) { - return v_call_f32 (tanhf, x, y, special); + return v_call_f32 ( + tanhf, x, + vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))), + special); } /* Approximation for single-precision vector tanh(x), using a simplified @@ -50,7 +53,9 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t sign = veorq_u32 (ix, iax); uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound); - float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef)); + /* expm1 exponent bias is 1.0f reinterpreted to int. */ + float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 ( + sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias))); #if WANT_SIMD_EXCEPT /* If fp exceptions are to be triggered properly, set all special and boring @@ -66,10 +71,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts); - float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); + if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vbslq_f32 (is_boring, boring, y), special); + return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q, + special); + + float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); return vbslq_f32 (is_boring, boring, y); } libmvec_hidden_def (V_NAME_F1 (tanh)) diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h index 59b552da6b..1daedfdd51 100644 --- a/sysdeps/aarch64/fpu/v_expm1f_inline.h +++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h @@ -21,48 +21,47 @@ #define AARCH64_FPU_V_EXPM1F_INLINE_H #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "math_config.h" struct v_expm1f_data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; + float32x4_t c0, c2; int32x4_t exponent_bias; + float c1, c3, inv_ln2, c4; + float ln2_hi, ln2_lo; }; /* Coefficients generated using fpminimax with degree=5 in [-log(2)/2, - log(2)/2]. Exponent bias is asuint(1.0f). - invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */ + log(2)/2]. Exponent bias is asuint(1.0f). */ #define V_EXPM1F_DATA \ { \ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \ - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \ - .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \ + .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5), \ + .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \ + .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ } static inline float32x4_t expm1f_inline (float32x4_t x, const struct v_expm1f_data *d) { - /* Helper routine for calculating exp(x) - 1. - Copied from v_expm1f_1u6.c, with all special-case handling removed - the - calling routine should handle special values if required. */ + /* Helper routine for calculating exp(x) - 1. */ + + float32x2_t ln2 = vld1_f32 (&d->ln2_hi); + float32x4_t lane_consts = vld1q_f32 (&d->c1); /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); + float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2)); int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); + float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0); + f = vfmsq_lane_f32 (f, j, ln2, 1); - /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). - Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses - Horner. */ + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). */ float32x4_t f2 = vmulq_f32 (f, f); float32x4_t f4 = vmulq_f32 (f2, f2); - float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1); + float32x4_t p = vfmaq_f32 (p01, f2, p23); + p = vfmaq_laneq_f32 (p, f4, lane_consts, 3); p = vfmaq_f32 (f, f2, p); /* t = 2^i. */ diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h index 643a6cdcfc..73e45a942e 100644 --- a/sysdeps/aarch64/fpu/v_log1pf_inline.h +++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h @@ -25,54 +25,81 @@ struct v_log1pf_data { - float32x4_t poly[8], ln2; uint32x4_t four; int32x4_t three_quarters; + float c0, c3, c5, c7; + float32x4_t c4, c6, c1, c2, ln2; }; /* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients (1, -0.5) are not stored as they can be generated more efficiently. */ #define V_LOG1PF_CONSTANTS_TABLE \ { \ - .poly \ - = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \ - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \ - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \ - .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ - .three_quarters = V4 (0x3f400000) \ + .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f), \ + .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f, \ + .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f, \ + .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f, \ + .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ + .three_quarters = V4 (0x3f400000) \ } static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *c) +eval_poly (float32x4_t m, const struct v_log1pf_data *d) { - /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine - uses split Estrin, but this way reduces register pressure in the calling - routine). */ - float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]); + /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */ + float32x4_t c0357 = vld1q_f32 (&d->c0); + float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0); float32x4_t m2 = vmulq_f32 (m, m); - q = vfmaq_f32 (m, m2, q); - float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1); + float32x4_t p = vfmaq_f32 (p45, m2, p67); + p = vfmaq_f32 (p23, m2, p); + p = vfmaq_f32 (d->c1, m, p); p = vmulq_f32 (m2, p); - return vfmaq_f32 (q, m2, p); + p = vfmaq_f32 (m, m2, p); + return vfmaq_f32 (p, m2, q); } static inline float32x4_t -log1pf_inline (float32x4_t x, const struct v_log1pf_data d) +log1pf_inline (float32x4_t x, const struct v_log1pf_data *d) { - /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no - special-case handling. See that file for details of the algorithm. */ + /* Helper for calculating log(x + 1). */ + + /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m + is in [-0.25, 0.5]): + log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). + + We approximate log1p(m) with a polynomial, then scale by + k*log(2). Instead of doing this directly, we use an intermediate + scale factor s = 4*k*log(2) to ensure the scale is representable + as a normalised fp32 number. */ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); + + /* Choose k to scale x to the range [-1/4, 1/2]. */ int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters), + = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), v_s32 (0xff800000)); uint32x4_t ku = vreinterpretq_u32_s32 (k); - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku)); + + /* Scale up to ensure that the scale factor is representable as normalised + fp32 number, and scale m down accordingly. */ + float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); + + /* Scale x by exponent manipulation. */ float32x4_t m_scale = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); - float32x4_t p = eval_poly (m_scale, d.poly); + + /* Evaluate polynomial on the reduced interval. */ + float32x4_t p = eval_poly (m_scale, d); + + /* The scale factor to be applied back at the end - by multiplying float(k) + by 2^-23 we get the unbiased exponent of k. */ float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f)); - return vfmaq_f32 (p, scale_back, d.ln2); + + /* Apply the scaling back. */ + return vfmaq_f32 (p, scale_back, d->ln2); } #endif diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index a7a4a94265..8c333fcd29 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -1082,24 +1082,24 @@ float: 1 ldouble: 3 Function: "exp10m1": -double: 2 -float: 1 -ldouble: 1 +double: 4 +float: 2 +ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -ldouble: 3 +double: 2 +float: 3 +ldouble: 6 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 3 +double: 5 +float: 3 +ldouble: 6 Function: "exp2": double: 1 @@ -1130,24 +1130,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -ldouble: 1 +double: 2 +float: 2 +ldouble: 2 Function: "exp2m1_downward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 2 +double: 3 +float: 2 +ldouble: 4 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 5 Function: "exp_advsimd": double: 1 @@ -1356,24 +1356,24 @@ float: 2 ldouble: 1 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 2 +float: 3 +ldouble: 4 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 2 +ldouble: 3 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 4 Function: "log1p": double: 1 @@ -1653,22 +1653,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 7ef77ee8c9..b76dde1557 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized memset using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "memset-reg.h" #ifndef MEMSET # define MEMSET memset @@ -25,130 +25,131 @@ /* Assumptions: * - * ARMv8-a, AArch64, unaligned accesses + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. * */ -ENTRY (MEMSET) +#define dstin x0 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 +#define off x3 +#define dstend2 x5 +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) dup v0.16B, valw + cmp count, 16 + b.lo L(set_small) + add dstend, dstin, count + cmp count, 64 + b.hs L(set_128) - cmp count, 96 - b.hi L(set_long) - cmp count, 16 - b.hs L(set_medium) - mov val, v0.D[0] + /* Set 16..63 bytes. */ + mov off, 16 + and off, off, count, lsr 1 + sub dstend2, dstend, off + str q0, [dstin] + str q0, [dstin, off] + str q0, [dstend2, -16] + str q0, [dstend, -16] + ret + .p2align 4 /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret - nop -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] +L(set_small): + add dstend, dstin, count + cmp count, 4 + b.lo 2f + lsr off, count, 3 + sub dstend2, dstend, off, lsl 2 + str s0, [dstin] + str s0, [dstin, off, lsl 2] + str s0, [dstend2, -4] + str s0, [dstend, -4] ret + + /* Set 0..3 bytes. */ 2: cbz count, 3f + lsr off, count, 1 strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] + strb valw, [dstin, off] + strb valw, [dstend, -1] 3: ret - /* Set 17..96 bytes. */ -L(set_medium): - str q0, [dstin] - tbnz count, 6, L(set96) - str q0, [dstend, -16] - tbz count, 5, 1f - str q0, [dstin, 16] - str q0, [dstend, -32] -1: ret - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -L(set96): - str q0, [dstin, 16] +L(set_128): + bic dst, dstin, 15 + cmp count, 128 + b.hi L(set_long) + stp q0, q0, [dstin] stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - .p2align 3 - nop + .p2align 4 L(set_long): - and valw, valw, 255 - bic dst, dstin, 15 str q0, [dstin] - cmp count, 256 - ccmp valw, 0, 0, cs - b.eq L(try_zva) -L(no_zva): - sub count, dstend, dst /* Count is 16 too large. */ - sub dst, dst, 16 /* Dst is biased by -32. */ - sub count, count, 64 + 16 /* Adjust count and bias for loop. */ -1: stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64]! -L(tail64): - subs count, count, 64 - b.hi 1b -2: stp q0, q0, [dstend, -64] + str q0, [dst, 16] + tst valw, 255 + b.ne L(no_zva) +#ifndef ZVA64_ONLY + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) +#endif + stp q0, q0, [dst, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 64 + 64 /* Adjust count and bias for loop. */ + + /* Write last bytes before ZVA loop. */ + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] + + .p2align 4 +L(zva64_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva64_loop) ret -L(try_zva): -#ifndef ZVA64_ONLY .p2align 3 - mrs tmp1, dczid_el0 - tbnz tmp1w, 4, L(no_zva) - and tmp1w, tmp1w, 15 - cmp tmp1w, 4 /* ZVA size is 64 bytes. */ - b.ne L(zva_128) - nop -#endif - /* Write the first and last 64 byte aligned block using stp rather - than using DC ZVA. This is faster on some cores. - */ - .p2align 4 -L(zva_64): - str q0, [dst, 16] +L(no_zva): + sub count, dstend, dst /* Count is 32 too large. */ + sub count, count, 64 + 32 /* Adjust count and bias for loop. */ +L(no_zva_loop): stp q0, q0, [dst, 32] - bic dst, dst, 63 stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+64+64 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst add dst, dst, 64 subs count, count, 64 - b.hi 1b - stp q0, q0, [dst, 0] - stp q0, q0, [dst, 32] + b.hi L(no_zva_loop) stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret #ifndef ZVA64_ONLY - .p2align 3 + .p2align 4 L(zva_128): - cmp tmp1w, 5 /* ZVA size is 128 bytes. */ - b.ne L(zva_other) + cmp zva_val, 5 /* ZVA size is 128 bytes. */ + b.ne L(no_zva) - str q0, [dst, 16] stp q0, q0, [dst, 32] stp q0, q0, [dst, 64] stp q0, q0, [dst, 96] bic dst, dst, 127 sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+128 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 128 + sub count, count, 128 + 128 /* Adjust count and bias for loop. */ +1: add dst, dst, 128 + dc zva, dst subs count, count, 128 b.hi 1b stp q0, q0, [dstend, -128] @@ -156,35 +157,6 @@ L(zva_128): stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - -L(zva_other): - mov tmp2w, 4 - lsl zva_lenw, tmp2w, tmp1w - add tmp1, zva_len, 64 /* Max alignment bytes written. */ - cmp count, tmp1 - blo L(no_zva) - - sub tmp2, zva_len, 1 - add tmp1, dst, zva_len - add dst, dst, 16 - subs count, tmp1, dst /* Actual alignment bytes to write. */ - bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ - beq 2f -1: stp q0, q0, [dst], 64 - stp q0, q0, [dst, -32] - subs count, count, 64 - b.hi 1b -2: mov dst, tmp1 - sub count, dstend, tmp1 /* Remaining bytes to write. */ - subs count, count, zva_len - b.lo 4f -3: dc zva, dst - add dst, dst, zva_len - subs count, count, zva_len - b.hs 3b -4: add count, count, zva_len - sub dst, dst, 32 /* Bias dst for tail loop. */ - b L(tail64) #endif END (MEMSET) diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index 2e6d882fc9..f665b5a891 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <sysdeps/aarch64/memset-reg.h> /* Assumptions: * @@ -36,6 +35,14 @@ .arch armv8.2-a+sve +#define dstin x0 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define tmp1 x5 +#define tmp2 x6 + .macro st1b_unroll first=0, last=7 st1b z0.b, p0, [dst, \first, mul vl] .if \last-\first diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S index 6d714ed0e1..cf1b25f2ed 100644 --- a/sysdeps/aarch64/multiarch/memset_emag.S +++ b/sysdeps/aarch64/multiarch/memset_emag.S @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "memset-reg.h" /* Assumptions: * @@ -26,6 +25,13 @@ * */ +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 + ENTRY (__memset_emag) PTR_ARG (0) diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S index 7b21550137..f815c20b03 100644 --- a/sysdeps/aarch64/multiarch/memset_kunpeng.S +++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <sysdeps/aarch64/memset-reg.h> /* Assumptions: * @@ -26,6 +25,12 @@ * */ +#define dstin x0 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 + ENTRY (__memset_kunpeng) PTR_ARG (0) diff --git a/sysdeps/aarch64/multiarch/memset_oryon1.S b/sysdeps/aarch64/multiarch/memset_oryon1.S index b43a43b54e..6fa28a9bd0 100644 --- a/sysdeps/aarch64/multiarch/memset_oryon1.S +++ b/sysdeps/aarch64/multiarch/memset_oryon1.S @@ -19,12 +19,18 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "memset-reg.h" /* Assumptions: ARMv8-a, AArch64, unaligned accesses */ +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 + ENTRY (__memset_oryon1) PTR_ARG (0) diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S index ab2a576cdb..352fb40d3a 100644 --- a/sysdeps/aarch64/strlen.S +++ b/sysdeps/aarch64/strlen.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized strlen using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -56,36 +57,50 @@ ENTRY (STRLEN) shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov synd, dend lsr synd, synd, shift - cbz synd, L(loop) + cbz synd, L(next16) rbit synd, synd clz result, synd lsr result, result, 2 ret +L(next16): + ldr data, [src, 16] + cmeq vhas_nul.16b, vdata.16b, 0 + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ + fmov synd, dend + cbz synd, L(loop) + add src, src, 16 +#ifndef __AARCH64EB__ + rbit synd, synd +#endif + sub result, src, srcin + clz tmp, synd + add result, result, tmp, lsr 2 + ret + .p2align 5 L(loop): - ldr data, [src, 16] + ldr data, [src, 32]! cmeq vhas_nul.16b, vdata.16b, 0 - umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + addhn vend.8b, vhas_nul.8h, vhas_nul.8h fmov synd, dend cbnz synd, L(loop_end) - ldr data, [src, 32]! + ldr data, [src, 16] cmeq vhas_nul.16b, vdata.16b, 0 - umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + addhn vend.8b, vhas_nul.8h, vhas_nul.8h fmov synd, dend cbz synd, L(loop) - sub src, src, 16 + add src, src, 16 L(loop_end): - shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ - sub result, src, srcin - fmov synd, dend + sub result, shift, src, lsl 2 /* (srcin - src) << 2. */ #ifndef __AARCH64EB__ rbit synd, synd + sub result, result, 3 #endif - add result, result, 16 clz tmp, synd - add result, result, tmp, lsr 2 + sub result, tmp, result + lsr result, result, 2 ret END (STRLEN) diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps index e28c2af683..5c6175406b 100644 --- a/sysdeps/alpha/fpu/libm-test-ulps +++ b/sysdeps/alpha/fpu/libm-test-ulps @@ -1410,22 +1410,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/arc/bits/endianness.h b/sysdeps/arc/bits/endianness.h index 8f17ca84b4..4a6e7d4c15 100644 --- a/sysdeps/arc/bits/endianness.h +++ b/sysdeps/arc/bits/endianness.h @@ -5,11 +5,7 @@ # error "Never use <bits/endian.h> directly; include <endian.h> instead." #endif -/* ARC has selectable endianness. */ -#ifdef __BIG_ENDIAN__ -# define __BYTE_ORDER __BIG_ENDIAN -#else -# define __BYTE_ORDER __LITTLE_ENDIAN -#endif +/* Only little-endian ARC is supported. */ +#define __BYTE_ORDER __LITTLE_ENDIAN #endif /* bits/endianness.h */ diff --git a/sysdeps/arc/configure b/sysdeps/arc/configure index 08e4f23f89..b4192983bf 100644 --- a/sysdeps/arc/configure +++ b/sysdeps/arc/configure @@ -10,7 +10,7 @@ libc_cv_asm_line_sep='`' printf "%s\n" "#define ASM_LINE_SEP $libc_cv_asm_line_sep" >>confdefs.h -# For big endian ABI, generate a symbol for selecting right dynamic linker +# ARC big endian ABI is no longer supported. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep -e" >&5 @@ -175,13 +175,8 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arc_be" >&5 printf "%s\n" "$libc_cv_arc_be" >&6; } if test $libc_cv_arc_be = yes; then - # For shlib-versions. - printf "%s\n" "#define HAVE_ARC_BE 1" >>confdefs.h - - config_vars="$config_vars -default-abi = arcbe" -else - config_vars="$config_vars -default-abi = arcle" + as_fn_error $? "big-endian not supported" "$LINENO" 5 fi +config_vars="$config_vars +default-abi = arcle" diff --git a/sysdeps/arc/configure.ac b/sysdeps/arc/configure.ac index 619da4e088..48dc6f5b9b 100644 --- a/sysdeps/arc/configure.ac +++ b/sysdeps/arc/configure.ac @@ -9,7 +9,7 @@ libc_cv_have_sdata_section=no libc_cv_asm_line_sep='`' AC_DEFINE_UNQUOTED(ASM_LINE_SEP, $libc_cv_asm_line_sep) -# For big endian ABI, generate a symbol for selecting right dynamic linker +# ARC big endian ABI is no longer supported. AC_CACHE_CHECK([for big endian], [libc_cv_arc_be], [AC_EGREP_CPP(yes,[#ifdef __BIG_ENDIAN__ @@ -17,9 +17,6 @@ AC_CACHE_CHECK([for big endian], #endif ], libc_cv_arc_be=yes, libc_cv_arc_be=no)]) if test $libc_cv_arc_be = yes; then - # For shlib-versions. - AC_DEFINE(HAVE_ARC_BE) - LIBC_CONFIG_VAR([default-abi], [arcbe]) -else - LIBC_CONFIG_VAR([default-abi], [arcle]) + AC_MSG_ERROR([big-endian not supported]) fi +LIBC_CONFIG_VAR([default-abi], [arcle]) diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps index 41c8ef16d7..d1665b9b72 100644 --- a/sysdeps/arc/fpu/libm-test-ulps +++ b/sysdeps/arc/fpu/libm-test-ulps @@ -787,6 +787,22 @@ Function: "exp10_upward": double: 4 float: 1 +Function: "exp10m1": +double: 4 +float: 2 + +Function: "exp10m1_downward": +double: 3 +float: 3 + +Function: "exp10m1_towardzero": +double: 2 +float: 3 + +Function: "exp10m1_upward": +double: 5 +float: 3 + Function: "exp2": double: 1 float: 1 @@ -801,6 +817,22 @@ Function: "exp2_upward": double: 2 float: 1 +Function: "exp2m1": +double: 2 +float: 2 + +Function: "exp2m1_downward": +double: 3 +float: 3 + +Function: "exp2m1_towardzero": +double: 3 +float: 2 + +Function: "exp2m1_upward": +double: 3 +float: 3 + Function: "exp_downward": double: 1 float: 1 @@ -943,6 +975,22 @@ Function: "log10_upward": double: 3 float: 4 +Function: "log10p1": +double: 2 +float: 2 + +Function: "log10p1_downward": +double: 2 +float: 3 + +Function: "log10p1_towardzero": +double: 3 +float: 2 + +Function: "log10p1_upward": +double: 2 +float: 3 + Function: "log1p": double: 1 float: 1 @@ -971,6 +1019,22 @@ Function: "log2_upward": double: 1 float: 1 +Function: "log2p1": +double: 2 +float: 2 + +Function: "log2p1_downward": +double: 2 +float: 2 + +Function: "log2p1_towardzero": +double: 2 +float: 2 + +Function: "log2p1_upward": +double: 2 +float: 2 + Function: "log_towardzero": double: 1 float: 1 @@ -1093,19 +1157,15 @@ float: 3 Function: "tgamma": double: 9 -float: 9 Function: "tgamma_downward": double: 9 -float: 9 Function: "tgamma_towardzero": double: 9 -float: 8 Function: "tgamma_upward": double: 9 -float: 9 Function: "y0": double: 3 diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps index d3f45957d4..e768024eb5 100644 --- a/sysdeps/arc/nofpu/libm-test-ulps +++ b/sysdeps/arc/nofpu/libm-test-ulps @@ -192,9 +192,17 @@ Function: "exp10": double: 2 float: 1 +Function: "exp10m1": +double: 3 +float: 2 + Function: "exp2": double: 1 +Function: "exp2m1": +double: 2 +float: 2 + Function: "expm1": double: 1 float: 1 @@ -205,6 +213,7 @@ float: 7 Function: "hypot": double: 1 +float: 1 Function: "j0": double: 2 @@ -226,6 +235,10 @@ Function: "log10": double: 2 float: 2 +Function: "log10p1": +double: 1 +float: 2 + Function: "log1p": double: 1 float: 1 @@ -234,6 +247,10 @@ Function: "log2": double: 2 float: 1 +Function: "log2p1": +double: 1 +float: 2 + Function: "logp1": double: 1 float: 1 @@ -262,7 +279,6 @@ float: 2 Function: "tgamma": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps index 6480353d39..21fe98a7bd 100644 --- a/sysdeps/arm/libm-test-ulps +++ b/sysdeps/arm/libm-test-ulps @@ -778,20 +778,20 @@ double: 2 float: 1 Function: "exp10m1": -double: 2 -float: 1 +double: 3 +float: 2 Function: "exp10m1_downward": -double: 1 -float: 1 +double: 3 +float: 3 Function: "exp10m1_towardzero": -double: 1 -float: 1 +double: 2 +float: 3 Function: "exp10m1_upward": -double: 3 -float: 1 +double: 4 +float: 4 Function: "exp2": double: 1 @@ -810,20 +810,20 @@ double: 1 float: 1 Function: "exp2m1": -double: 1 -float: 1 +double: 2 +float: 2 Function: "exp2m1_downward": -double: 2 -float: 1 +double: 3 +float: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 +double: 3 +float: 2 Function: "exp2m1_upward": -double: 1 -float: 1 +double: 3 +float: 3 Function: "exp_downward": double: 1 @@ -967,19 +967,19 @@ float: 2 Function: "log10p1": double: 1 -float: 1 +float: 2 Function: "log10p1_downward": double: 2 -float: 1 +float: 3 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 Function: "log10p1_upward": double: 2 -float: 1 +float: 3 Function: "log1p": double: 1 @@ -1152,19 +1152,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps index fc634f89ca..151df00638 100644 --- a/sysdeps/csky/fpu/libm-test-ulps +++ b/sysdeps/csky/fpu/libm-test-ulps @@ -1061,19 +1061,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 8 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps index a1e28c8ee0..48697a7b09 100644 --- a/sysdeps/csky/nofpu/libm-test-ulps +++ b/sysdeps/csky/nofpu/libm-test-ulps @@ -1092,19 +1092,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 5 -float: 5 Function: "tgamma_towardzero": double: 5 -float: 4 Function: "tgamma_upward": double: 4 -float: 4 Function: "y0": double: 3 diff --git a/sysdeps/generic/internal-signals.h b/sysdeps/generic/internal-signals.h index 3db100be10..e031a96bac 100644 --- a/sysdeps/generic/internal-signals.h +++ b/sysdeps/generic/internal-signals.h @@ -20,6 +20,7 @@ # define __INTERNAL_SIGNALS_H #include <signal.h> +#include <internal-sigset.h> #include <sigsetops.h> #include <stdbool.h> #include <stddef.h> @@ -39,10 +40,32 @@ clear_internal_signals (sigset_t *set) { } -typedef sigset_t internal_sigset_t; - #define internal_sigemptyset(__s) __sigemptyset (__s) +#define internal_sigfillset(__s) __sigfillset (__s) #define internal_sigaddset(__s, __i) __sigaddset (__s, __i) #define internal_sigprocmask(__h, __s, __o) __sigprocmask (__h, __s, __o) +static inline void +internal_signal_block_all (internal_sigset_t *oset) +{ + internal_sigset_t set; + internal_sigfillset (&set); + internal_sigprocmask (SIG_BLOCK, &set, oset); +} + +static inline void +internal_signal_restore_set (const internal_sigset_t *set) +{ + internal_sigprocmask (SIG_SETMASK, set, NULL); +} + +static inline void +internal_signal_unblock_signal (int sig) +{ + internal_sigset_t set; + internal_sigemptyset (&set); + internal_sigaddset (&set, sig); + internal_sigprocmask (SIG_UNBLOCK, &set, NULL); +} + #endif /* __INTERNAL_SIGNALS_H */ diff --git a/sysdeps/aarch64/memset-reg.h b/sysdeps/generic/internal-sigset.h index 6c7f60b37e..80279ffc47 100644 --- a/sysdeps/aarch64/memset-reg.h +++ b/sysdeps/generic/internal-sigset.h @@ -1,5 +1,5 @@ -/* Register aliases for memset to be used across implementations. - Copyright (C) 2017-2024 Free Software Foundation, Inc. +/* Internal sigset_t definition. + Copyright (C) 2022-2023 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,15 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define dstin x0 -#define val x1 -#define valw w1 -#define count x2 -#define dst x3 -#define dstend x4 -#define tmp1 x5 -#define tmp1w w5 -#define tmp2 x6 -#define tmp2w w6 -#define zva_len x7 -#define zva_lenw w7 +#ifndef _INTERNAL_SIGSET_H +#define _INTERNAL_SIGSET_H + +#include <signal.h> + +typedef sigset_t internal_sigset_t; + +#endif diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 656e8a3fa0..259ce2e7d6 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -1200,10 +1200,8 @@ extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp); extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden; -/* These are internal entry points to the two halves of _dl_allocate_tls, - only used within rtld.c itself at startup time. */ extern void *_dl_allocate_tls_storage (void) attribute_hidden; -extern void *_dl_allocate_tls_init (void *, bool); +extern void *_dl_allocate_tls_init (void *result, bool main_thread); rtld_hidden_proto (_dl_allocate_tls_init) /* True if the TCB has been set up. */ @@ -1241,13 +1239,7 @@ extern void *_dl_open (const char *name, int mode, const void *caller, extern int _dl_scope_free (void *) attribute_hidden; -/* Add module to slot information data. If DO_ADD is false, only the - required memory is allocated. Must be called with GL - (dl_load_tls_lock) acquired. If the function has already been called - for the link map L with !do_add, then this function will not raise - an exception, otherwise it is possible that it encounters a memory - allocation failure. */ -extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add) +extern bool _dl_add_to_slotinfo (struct link_map *l, bool do_add) attribute_hidden; /* Update slot information data for at least the generation of the diff --git a/sysdeps/generic/mremap-failure.h b/sysdeps/generic/mremap-failure.h new file mode 100644 index 0000000000..bc0d476368 --- /dev/null +++ b/sysdeps/generic/mremap-failure.h @@ -0,0 +1,25 @@ +/* mremap failure handling. Generic version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* Return exit value on mremap failure with errno ERR. */ + +static int +mremap_failure_exit (int err) +{ + return EXIT_FAILURE; +} diff --git a/sysdeps/generic/syscall_types.h b/sysdeps/generic/syscall_types.h new file mode 100644 index 0000000000..2ddeaa2b5f --- /dev/null +++ b/sysdeps/generic/syscall_types.h @@ -0,0 +1,25 @@ +/* Types and macros used for syscall issuing. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYSCALL_TYPES_H +#define _SYSCALL_TYPES_H + +typedef long int __syscall_arg_t; +#define __SSC(__x) ((__syscall_arg_t) (__x)) + +#endif diff --git a/sysdeps/gnu/Makefile b/sysdeps/gnu/Makefile index c89eb8ed1c..d23ded40a0 100644 --- a/sysdeps/gnu/Makefile +++ b/sysdeps/gnu/Makefile @@ -16,37 +16,67 @@ # <https://www.gnu.org/licenses/>. ifeq ($(subdir),login) -sysdep_routines += setutxent getutxent endutxent getutxid getutxline \ - pututxline utmpxname updwtmpx getutmpx getutmp +sysdep_routines += \ + endutxent \ + getutmp \ + getutmpx \ + getutxent \ + getutxid \ + getutxline \ + pututxline \ + setutxent \ + updwtmpx \ + utmpxname \ + # sysdep_routines -sysdep_headers += utmpx.h bits/utmpx.h +sysdep_headers += \ + bits/utmpx.h \ + utmpx.h \ + # sysdep_headers endif ifeq ($(subdir),inet) -sysdep_headers += netinet/udp.h netinet/ip_icmp.h +sysdep_headers += \ + netinet/ip_icmp.h \ + netinet/udp.h \ + # sysdep_headers endif ifeq ($(subdir),misc) -sysdep_headers += sys/mtio.h +sysdep_headers += \ + sys/mtio.h \ + # sysdep_headers endif ifeq ($(subdir),csu) -routines += unwind-resume -shared-only-routines += unwind-resume +routines += \ + unwind-resume \ + # routines +shared-only-routines += \ + unwind-resume \ + # shared-only-routines CFLAGS-unwind-resume.c += -fexceptions -fasynchronous-unwind-tables endif + ifeq ($(subdir),rt) -librt-sysdep_routines += rt-unwind-resume -librt-shared-only-routines += rt-unwind-resume +librt-sysdep_routines += \ + rt-unwind-resume \ + # librt-sysdep_routines +librt-shared-only-routines += \ + rt-unwind-resume \ + # librt-shared-only-routines CFLAGS-rt-unwind-resume.c += -fexceptions -fasynchronous-unwind-tables endif + ifeq ($(subdir),sysvipc) -sysdep_headers += bits/ipc-perm.h \ - bits/types/struct_msqid_ds.h \ - bits/types/struct_shmid_ds.h +sysdep_headers += \ + bits/ipc-perm.h \ + bits/types/struct_msqid_ds.h \ + bits/types/struct_shmid_ds.h + # sysdep_headers endif diff --git a/sysdeps/hppa/dl-fptr.h b/sysdeps/hppa/dl-fptr.h index 7c5eb0bd30..b7fd9cf71a 100644 --- a/sysdeps/hppa/dl-fptr.h +++ b/sysdeps/hppa/dl-fptr.h @@ -19,7 +19,28 @@ #ifndef dl_hppa_fptr_h #define dl_hppa_fptr_h 1 -#include <sysdeps/generic/dl-fptr.h> +/* An FDESC is a function descriptor. */ + +struct fdesc + { + ElfW(Addr) ip; /* code entry point */ + ElfW(Addr) gp; /* global pointer */ + }; + +struct fdesc_table + { + struct fdesc_table *next; + unsigned int len; /* # of entries in fdesc table */ + volatile unsigned int first_unused; /* index of first available entry */ + struct fdesc fdesc[0]; + }; + +struct link_map; + +extern ElfW(Addr) _dl_boot_fptr_table []; + +extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *, + ElfW(Addr)); /* Initialize function pointer code. Call before relocation processing. */ extern void _dl_fptr_init (void); diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps index ea5101f6b6..57c1155e8b 100644 --- a/sysdeps/hppa/fpu/libm-test-ulps +++ b/sysdeps/hppa/fpu/libm-test-ulps @@ -797,20 +797,20 @@ double: 2 float: 1 Function: "exp10m1": -double: 2 -float: 1 +double: 4 +float: 2 Function: "exp10m1_downward": -double: 1 -float: 1 +double: 3 +float: 3 Function: "exp10m1_towardzero": -double: 1 -float: 1 +double: 2 +float: 3 Function: "exp10m1_upward": -double: 3 -float: 1 +double: 5 +float: 4 Function: "exp2": double: 1 @@ -829,20 +829,20 @@ double: 1 float: 1 Function: "exp2m1": -double: 1 -float: 1 +double: 2 +float: 2 Function: "exp2m1_downward": -double: 2 -float: 1 +double: 3 +float: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 +double: 3 +float: 2 Function: "exp2m1_upward": -double: 1 -float: 1 +double: 3 +float: 3 Function: "exp_downward": double: 1 @@ -992,20 +992,20 @@ double: 2 float: 2 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 Function: "log10p1_downward": double: 2 -float: 1 +float: 3 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 Function: "log10p1_upward": double: 2 -float: 1 +float: 3 Function: "log1p": double: 1 @@ -1181,20 +1181,16 @@ float: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 1 Function: "tgamma_downward": double: 9 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/htl/pt-attr-getdetachstate.c b/sysdeps/htl/pt-attr-getdetachstate.c index 8b077e38f3..da27398260 100644 --- a/sysdeps/htl/pt-attr-getdetachstate.c +++ b/sysdeps/htl/pt-attr-getdetachstate.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -26,4 +27,8 @@ __pthread_attr_getdetachstate (const pthread_attr_t *attr, int *detachstate) return 0; } -weak_alias (__pthread_attr_getdetachstate, pthread_attr_getdetachstate); +versioned_symbol (libc, __pthread_attr_getdetachstate, pthread_attr_getdetachstate, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_getdetachstate, pthread_attr_getdetachstate, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-attr-getinheritsched.c b/sysdeps/htl/pt-attr-getinheritsched.c index 078face3eb..ba02e202c3 100644 --- a/sysdeps/htl/pt-attr-getinheritsched.c +++ b/sysdeps/htl/pt-attr-getinheritsched.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -26,4 +27,8 @@ __pthread_attr_getinheritsched (const pthread_attr_t *attr, int *inheritsched) return 0; } -weak_alias (__pthread_attr_getinheritsched, pthread_attr_getinheritsched); +versioned_symbol (libc, __pthread_attr_getinheritsched, pthread_attr_getinheritsched, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_getinheritsched, pthread_attr_getinheritsched, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-attr-getschedparam.c b/sysdeps/htl/pt-attr-getschedparam.c index 3fa001871e..35eeedc442 100644 --- a/sysdeps/htl/pt-attr-getschedparam.c +++ b/sysdeps/htl/pt-attr-getschedparam.c @@ -19,6 +19,7 @@ #include <pthread.h> #include <sched.h> #include <string.h> +#include <shlib-compat.h> #include <pt-internal.h> @@ -30,4 +31,8 @@ __pthread_attr_getschedparam (const pthread_attr_t *attr, return 0; } -weak_alias (__pthread_attr_getschedparam, pthread_attr_getschedparam); +versioned_symbol (libc, __pthread_attr_getschedparam, pthread_attr_getschedparam, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_getschedparam, pthread_attr_getschedparam, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-attr-getschedpolicy.c b/sysdeps/htl/pt-attr-getschedpolicy.c index 8b4cc7ce48..5deaf01c5e 100644 --- a/sysdeps/htl/pt-attr-getschedpolicy.c +++ b/sysdeps/htl/pt-attr-getschedpolicy.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -26,4 +27,8 @@ __pthread_attr_getschedpolicy (const pthread_attr_t *attr, int *policy) return 0; } -weak_alias (__pthread_attr_getschedpolicy, pthread_attr_getschedpolicy); +versioned_symbol (libc, __pthread_attr_getschedpolicy, pthread_attr_getschedpolicy, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_getschedpolicy, pthread_attr_getschedpolicy, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-attr-setdetachstate.c b/sysdeps/htl/pt-attr-setdetachstate.c index b40557f816..26b6f91dc6 100644 --- a/sysdeps/htl/pt-attr-setdetachstate.c +++ b/sysdeps/htl/pt-attr-setdetachstate.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -35,4 +36,8 @@ __pthread_attr_setdetachstate (pthread_attr_t *attr, int detachstate) return 0; } -weak_alias (__pthread_attr_setdetachstate, pthread_attr_setdetachstate); +versioned_symbol (libc, __pthread_attr_setdetachstate, pthread_attr_setdetachstate, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_setdetachstate, pthread_attr_setdetachstate, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-attr-setinheritsched.c b/sysdeps/htl/pt-attr-setinheritsched.c index 533b805b12..4ad0bb1dec 100644 --- a/sysdeps/htl/pt-attr-setinheritsched.c +++ b/sysdeps/htl/pt-attr-setinheritsched.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -35,4 +36,8 @@ __pthread_attr_setinheritsched (pthread_attr_t *attr, int inheritsched) return 0; } -weak_alias (__pthread_attr_setinheritsched, pthread_attr_setinheritsched); +versioned_symbol (libc, __pthread_attr_setinheritsched, pthread_attr_setinheritsched, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_setinheritsched, pthread_attr_setinheritsched, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-attr-setschedpolicy.c b/sysdeps/htl/pt-attr-setschedpolicy.c index d8086e8316..9648c774d0 100644 --- a/sysdeps/htl/pt-attr-setschedpolicy.c +++ b/sysdeps/htl/pt-attr-setschedpolicy.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -39,4 +40,8 @@ __pthread_attr_setschedpolicy (pthread_attr_t *attr, int policy) return 0; } -weak_alias (__pthread_attr_setschedpolicy, pthread_attr_setschedpolicy); +versioned_symbol (libc, __pthread_attr_setschedpolicy, pthread_attr_setschedpolicy, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_attr_setschedpolicy, pthread_attr_setschedpolicy, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-equal.c b/sysdeps/htl/pt-equal.c index 6411110608..7727cca370 100644 --- a/sysdeps/htl/pt-equal.c +++ b/sysdeps/htl/pt-equal.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> /* Return true if __T1 and __T2 both name the same thread. Otherwise, @@ -27,4 +28,8 @@ __pthread_equal (pthread_t __t1, pthread_t __t2) return __t1 == __t2; } -weak_alias (__pthread_equal, pthread_equal); +versioned_symbol (libc, __pthread_equal, pthread_equal, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_equal, pthread_equal, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-getschedparam.c b/sysdeps/htl/pt-getschedparam.c index 8ec3d54bd4..0c6a2d0ab3 100644 --- a/sysdeps/htl/pt-getschedparam.c +++ b/sysdeps/htl/pt-getschedparam.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -28,4 +29,8 @@ __pthread_getschedparam (pthread_t thread, int *policy, return 0; } -weak_alias (__pthread_getschedparam, pthread_getschedparam); +versioned_symbol (libc, __pthread_getschedparam, pthread_getschedparam, GLIBC_2_21); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_getschedparam, pthread_getschedparam, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-setschedparam.c b/sysdeps/htl/pt-setschedparam.c index aea4e7b0e4..5594c9f390 100644 --- a/sysdeps/htl/pt-setschedparam.c +++ b/sysdeps/htl/pt-setschedparam.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <pthread.h> +#include <shlib-compat.h> #include <pt-internal.h> int @@ -26,5 +27,9 @@ __pthread_setschedparam (pthread_t thread, int policy, return ENOSYS; } -weak_alias (__pthread_setschedparam, pthread_setschedparam); +versioned_symbol (libc, __pthread_setschedparam, pthread_setschedparam, GLIBC_2_21); stub_warning (pthread_setschedparam) + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21) +compat_symbol (libc, __pthread_setschedparam, pthread_setschedparam, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pthreadP.h b/sysdeps/htl/pthreadP.h index cf8a2efe86..b0c9ceb23a 100644 --- a/sysdeps/htl/pthreadP.h +++ b/sysdeps/htl/pthreadP.h @@ -92,6 +92,8 @@ int __pthread_attr_setstack (pthread_attr_t *__attr, void *__stackaddr, int __pthread_attr_getstack (const pthread_attr_t *, void **, size_t *); void __pthread_testcancel (void); +#define __pthread_raise_internal(__sig) raise (__sig) + libc_hidden_proto (__pthread_self) #if IS_IN (libpthread) diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index 03297e6527..d13b05a220 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -1154,28 +1154,28 @@ float128: 3 ldouble: 2 Function: "exp10m1": -double: 1 +double: 3 float: 1 -float128: 1 -ldouble: 1 +float128: 3 +ldouble: 4 Function: "exp10m1_downward": -double: 1 -float: 1 -float128: 3 -ldouble: 2 +double: 3 +float: 3 +float128: 6 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -float128: 3 -ldouble: 2 +double: 2 +float: 2 +float128: 6 +ldouble: 5 Function: "exp10m1_upward": -double: 3 -float: 1 -float128: 3 -ldouble: 2 +double: 5 +float: 3 +float128: 6 +ldouble: 6 Function: "exp2": double: 1 @@ -1197,28 +1197,28 @@ float128: 2 ldouble: 1 Function: "exp2m1": -double: 1 -float: 1 -float128: 1 -ldouble: 1 - -Function: "exp2m1_downward": double: 2 float: 1 float128: 2 -ldouble: 2 +ldouble: 3 + +Function: "exp2m1_downward": +double: 3 +float: 3 +float128: 3 +ldouble: 6 Function: "exp2m1_towardzero": -double: 2 -float: 1 -float128: 2 -ldouble: 2 +double: 3 +float: 2 +float128: 4 +ldouble: 5 Function: "exp2m1_upward": -double: 1 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 3 +float128: 5 +ldouble: 6 Function: "exp_downward": double: 1 @@ -1427,25 +1427,25 @@ Function: "log10p1": double: 1 float: 2 float128: 3 -ldouble: 2 +ldouble: 4 Function: "log10p1_downward": double: 2 -float: 1 -float128: 2 -ldouble: 4 +float: 3 +float128: 4 +ldouble: 8 Function: "log10p1_towardzero": double: 2 float: 1 -float128: 2 -ldouble: 4 +float128: 3 +ldouble: 8 Function: "log10p1_upward": double: 2 -float: 2 -float128: 3 -ldouble: 3 +float: 3 +float128: 4 +ldouble: 6 Function: "log1p": double: 1 @@ -1512,7 +1512,7 @@ float128: 2 ldouble: 3 Function: "log2p1_upward": -double: 1 +double: 2 float: 2 float128: 3 ldouble: 5 @@ -1699,25 +1699,21 @@ ldouble: 4 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps index 85a2456971..0a813aa8dd 100644 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps @@ -1157,28 +1157,28 @@ float128: 3 ldouble: 2 Function: "exp10m1": -double: 1 +double: 3 float: 1 -float128: 1 -ldouble: 1 +float128: 3 +ldouble: 4 Function: "exp10m1_downward": -double: 1 -float: 1 -float128: 3 -ldouble: 2 +double: 3 +float: 3 +float128: 6 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -float128: 3 -ldouble: 2 +double: 2 +float: 2 +float128: 6 +ldouble: 5 Function: "exp10m1_upward": -double: 3 -float: 1 -float128: 3 -ldouble: 2 +double: 5 +float: 3 +float128: 6 +ldouble: 6 Function: "exp2": double: 1 @@ -1200,28 +1200,28 @@ float128: 2 ldouble: 1 Function: "exp2m1": -double: 1 -float: 1 -float128: 1 -ldouble: 1 - -Function: "exp2m1_downward": double: 2 float: 1 float128: 2 ldouble: 3 +Function: "exp2m1_downward": +double: 3 +float: 3 +float128: 3 +ldouble: 6 + Function: "exp2m1_towardzero": -double: 2 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 2 +float128: 4 +ldouble: 6 Function: "exp2m1_upward": -double: 1 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 3 +float128: 5 +ldouble: 6 Function: "exp_downward": double: 1 @@ -1430,25 +1430,25 @@ Function: "log10p1": double: 1 float: 2 float128: 3 -ldouble: 2 +ldouble: 4 Function: "log10p1_downward": double: 2 float: 1 -float128: 2 -ldouble: 4 +float128: 4 +ldouble: 8 Function: "log10p1_towardzero": double: 2 float: 1 -float128: 2 -ldouble: 4 +float128: 3 +ldouble: 8 Function: "log10p1_upward": double: 2 float: 2 -float128: 3 -ldouble: 3 +float128: 4 +ldouble: 6 Function: "log1p": double: 1 @@ -1701,25 +1701,21 @@ ldouble: 4 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 8 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c index 9d17f67a10..6c3fbc0290 100644 --- a/sysdeps/i386/i686/multiarch/s_fma-fma.c +++ b/sysdeps/i386/i686/multiarch/s_fma-fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c index 978345bda5..3ae111aac5 100644 --- a/sysdeps/i386/i686/multiarch/s_fma.c +++ b/sysdeps/i386/i686/multiarch/s_fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c index f049a60f57..aff92fa5b6 100644 --- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c +++ b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c index e92ebb19a7..c9be68257f 100644 --- a/sysdeps/i386/i686/multiarch/s_fmaf.c +++ b/sysdeps/i386/i686/multiarch/s_fmaf.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/ieee754/dbl-64/gamma_productf.c b/sysdeps/ieee754/dbl-64/gamma_productf.c index f3596eeae4..1cc8931700 100644 --- a/sysdeps/ieee754/dbl-64/gamma_productf.c +++ b/sysdeps/ieee754/dbl-64/gamma_productf.c @@ -1,44 +1 @@ -/* Compute a product of X, X+1, ..., with an error estimate. - Copyright (C) 2013-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <math.h> -#include <math-narrow-eval.h> -#include <math_private.h> -#include <float.h> - -/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N - - 1, in the form R * (1 + *EPS) where the return value R is an - approximation to the product and *EPS is set to indicate the - approximate error in the return value. X is such that all the - values X + 1, ..., X + N - 1 are exactly representable, and X_EPS / - X is small enough that factors quadratic in it can be - neglected. */ - -float -__gamma_productf (float x, float x_eps, int n, float *eps) -{ - double x_full = (double) x + (double) x_eps; - double ret = x_full; - for (int i = 1; i < n; i++) - ret *= x_full + i; - - float fret = math_narrow_eval ((float) ret); - *eps = (ret - fret) / fret; - - return fret; -} +/* Not needed. */ diff --git a/sysdeps/ieee754/flt-32/e_gammaf_r.c b/sysdeps/ieee754/flt-32/e_gammaf_r.c index a9730d61c1..90ed3b4890 100644 --- a/sysdeps/ieee754/flt-32/e_gammaf_r.c +++ b/sysdeps/ieee754/flt-32/e_gammaf_r.c @@ -1,215 +1,150 @@ -/* Implementation of gamma function according to ISO C. - Copyright (C) 1997-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. +/* Implementation of the gamma function for binary32. - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. +Copyright (c) 2023-2024 Alexei Sibidanov. - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. +The original version of this file was copied from the CORE-MATH +project (file src/binary32/tgamma/tgammaf.c, revision a48e352). - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -#include <math.h> -#include <math-narrow-eval.h> -#include <math_private.h> -#include <fenv_private.h> -#include <math-underflow.h> -#include <float.h> -#include <libm-alias-finite.h> - -/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) inside exp in Stirling's - approximation to gamma function. */ +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -static const float gamma_coeff[] = - { - 0x1.555556p-4f, - -0xb.60b61p-12f, - 0x3.403404p-12f, - }; +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + */ -#define NCOEFF (sizeof (gamma_coeff) / sizeof (gamma_coeff[0])) +/* Changes with respect to the original CORE-MATH code: + - removed the dealing with errno + (this is done in the wrapper math/w_tgammaf_compat.c) + - usage of math_narrow_eval to deal with underflow/overflow + - deal with signgamp + */ -/* Return gamma (X), for positive X less than 42, in the form R * - 2^(*EXP2_ADJ), where R is the return value and *EXP2_ADJ is set to - avoid overflow or underflow in intermediate calculations. */ +#include <math.h> +#include <float.h> +#include <stdint.h> +#include <stddef.h> +#include <libm-alias-finite.h> +#include <math-narrow-eval.h> -static float -gammaf_positive (float x, int *exp2_adj) -{ - int local_signgam; - if (x < 0.5f) - { - *exp2_adj = 0; - return __ieee754_expf (__ieee754_lgammaf_r (x + 1, &local_signgam)) / x; - } - else if (x <= 1.5f) - { - *exp2_adj = 0; - return __ieee754_expf (__ieee754_lgammaf_r (x, &local_signgam)); - } - else if (x < 2.5f) - { - *exp2_adj = 0; - float x_adj = x - 1; - return (__ieee754_expf (__ieee754_lgammaf_r (x_adj, &local_signgam)) - * x_adj); - } - else - { - float eps = 0; - float x_eps = 0; - float x_adj = x; - float prod = 1; - if (x < 4.0f) - { - /* Adjust into the range for applying Stirling's - approximation. */ - float n = ceilf (4.0f - x); - x_adj = math_narrow_eval (x + n); - x_eps = (x - (x_adj - n)); - prod = __gamma_productf (x_adj - n, x_eps, n, &eps); - } - /* The result is now gamma (X_ADJ + X_EPS) / (PROD * (1 + EPS)). - Compute gamma (X_ADJ + X_EPS) using Stirling's approximation, - starting by computing pow (X_ADJ, X_ADJ) with a power of 2 - factored out. */ - float exp_adj = -eps; - float x_adj_int = roundf (x_adj); - float x_adj_frac = x_adj - x_adj_int; - int x_adj_log2; - float x_adj_mant = __frexpf (x_adj, &x_adj_log2); - if (x_adj_mant < M_SQRT1_2f) - { - x_adj_log2--; - x_adj_mant *= 2.0f; - } - *exp2_adj = x_adj_log2 * (int) x_adj_int; - float ret = (__ieee754_powf (x_adj_mant, x_adj) - * __ieee754_exp2f (x_adj_log2 * x_adj_frac) - * __ieee754_expf (-x_adj) - * sqrtf (2 * M_PIf / x_adj) - / prod); - exp_adj += x_eps * __ieee754_logf (x_adj); - float bsum = gamma_coeff[NCOEFF - 1]; - float x_adj2 = x_adj * x_adj; - for (size_t i = 1; i <= NCOEFF - 1; i++) - bsum = bsum / x_adj2 + gamma_coeff[NCOEFF - 1 - i]; - exp_adj += bsum / x_adj; - return ret + ret * __expm1f (exp_adj); - } -} +typedef union {float f; uint32_t u;} b32u32_u; +typedef union {double f; uint64_t u;} b64u64_u; float __ieee754_gammaf_r (float x, int *signgamp) { - int32_t hx; - float ret; + /* The wrapper in math/w_tgamma_template.c expects *signgamp to be set to a + non-negative value if the returned value is gamma(x), and to a negative + value if it is -gamma(x). + Since the code here directly computes gamma(x), we set it to 1. + */ + if (signgamp != NULL) + *signgamp = 1; - GET_FLOAT_WORD (hx, x); + /* List of exceptional cases. Each entry contains the 32-bit encoding u of x, + a binary32 approximation f of gamma(x), and a correction term df. */ + static const struct {uint32_t u; float f, df;} tb[] = { + {0x27de86a9u, 0x1.268266p+47f, 0x1p22f}, // x = 0x1.bd0d52p-48 + {0x27e05475u, 0x1.242422p+47f, 0x1p22f}, // x = 0x1.c0a8eap-48 + {0xb63befb3u, -0x1.5cb6e4p+18f, 0x1p-7f}, // x = -0x1.77df66p-19 + {0x3c7bb570u, 0x1.021d9p+6f, 0x1p-19f}, // x = 0x1.f76aep-7 + {0x41e886d1u, 0x1.33136ap+98f, 0x1p73f}, // x = 0x1.d10da2p+4 + {0xc067d177u, 0x1.f6850cp-3f, 0x1p-28f}, // x = -0x1.cfa2eep+1 + {0xbd99da31u, -0x1.befe66p+3, -0x1p-22f}, // x = -0x1.33b462p-4 + {0xbf54c45au, -0x1.a6b4ecp+2, +0x1p-23f}, // x = -0x1.a988b4p-1 + {0x41ee77feu, 0x1.d3631cp+101, -0x1p-76f}, // x = 0x1.dceffcp+4 + {0x3f843a64u, 0x1.f6c638p-1, 0x1p-26f}, // x = 0x1.0874c8p+0 + }; - if (__glibc_unlikely ((hx & 0x7fffffff) == 0)) - { - /* Return value for x == 0 is Inf with divide by zero exception. */ - *signgamp = 0; - return 1.0 / x; + b32u32_u t = {.f = x}; + uint32_t ax = t.u<<1; + if(__builtin_expect(ax>=(0xffu<<24), 0)){ /* x=NaN or +/-Inf */ + if(ax==(0xffu<<24)){ /* x=+/-Inf */ + if(t.u>>31){ /* x=-Inf */ + return x / x; /* will raise the "Invalid operation" exception */ + } + return x; /* x=+Inf */ } - if (__builtin_expect (hx < 0, 0) - && (uint32_t) hx < 0xff800000 && rintf (x) == x) - { - /* Return value for integer x < 0 is NaN with invalid exception. */ - *signgamp = 0; - return (x - x) / (x - x); + return x + x; /* x=NaN, where x+x ensures the "Invalid operation" + exception is set if x is sNaN */ + } + double z = x; + if(__builtin_expect(ax<0x6d000000u, 0)){ /* |x| < 0x1p-18 */ + volatile double d = (0x1.fa658c23b1578p-1 - 0x1.d0a118f324b63p-1*z)*z - 0x1.2788cfc6fb619p-1; + double f = 1.0/z + d; + float r = f; + b64u64_u rt = {.f = f}; + if(((rt.u+2)&0xfffffff) < 4){ + for(unsigned i=0;i<sizeof(tb)/sizeof(tb[0]);i++) + if(t.u==tb[i].u) return tb[i].f + tb[i].df; } - if (__glibc_unlikely (hx == 0xff800000)) - { - /* x == -Inf. According to ISO this is NaN. */ - *signgamp = 0; - return x - x; + return r; + } + float fx = __builtin_floorf(x); + if(__builtin_expect(x >= 0x1.18522p+5f, 0)){ + /* Overflow case. The original CORE-MATH code returns 0x1p127f * 0x1p127f, + but apparently some compilers replace this by +Inf. */ + return math_narrow_eval (x * 0x1p127f); + } + /* compute k only after the overflow check, otherwise the case to integer + might overflow */ + int k = fx; + if(__builtin_expect(fx==x, 0)){ /* x is integer */ + if(x == 0.0f){ + return 1.0f/x; } - if (__glibc_unlikely ((hx & 0x7f800000) == 0x7f800000)) - { - /* Positive infinity (return positive infinity) or NaN (return - NaN). */ - *signgamp = 0; - return x + x; + if(x < 0.0f){ + return 0.0f / 0.0f; /* should raise the "Invalid operation" exception */ } + double t0 = 1, x0 = 1; + for(int i=1; i<k; i++, x0 += 1.0) t0 *= x0; + return t0; + } + if(__builtin_expect(x<-42.0f, 0)){ /* negative non-integer */ + /* For x < -42, x non-integer, |gamma(x)| < 2^-151. */ + static const float sgn[2] = {0x1p-127f, -0x1p-127f}; + /* Underflows always happens */ + return math_narrow_eval (0x1p-127f * sgn[k&1]); + } + /* The array c[] stores a degree-15 polynomial approximation for gamma(x). */ + static const double c[] = + {0x1.c9a76be577123p+0, 0x1.8f2754ddcf90dp+0, 0x1.0d1191949419bp+0, 0x1.e1f42cf0ae4a1p-2, + 0x1.82b358a3ab638p-3, 0x1.e1f2b30cd907bp-5, 0x1.240f6d4071bd8p-6, 0x1.1522c9f3cd012p-8, + 0x1.1fd0051a0525bp-10, 0x1.9808a8b96c37ep-13, 0x1.b3f78e01152b5p-15, 0x1.49c85a7e1fd04p-18, + 0x1.471ca49184475p-19, -0x1.368f0b7ed9e36p-23, 0x1.882222f9049efp-23, -0x1.a69ed2042842cp-25}; - if (x >= 36.0f) - { - /* Overflow. */ - *signgamp = 0; - ret = math_narrow_eval (FLT_MAX * FLT_MAX); - return ret; - } - else - { - SET_RESTORE_ROUNDF (FE_TONEAREST); - if (x > 0.0f) - { - *signgamp = 0; - int exp2_adj; - float tret = gammaf_positive (x, &exp2_adj); - ret = __scalbnf (tret, exp2_adj); - } - else if (x >= -FLT_EPSILON / 4.0f) - { - *signgamp = 0; - ret = 1.0f / x; - } - else - { - float tx = truncf (x); - *signgamp = (tx == 2.0f * truncf (tx / 2.0f)) ? -1 : 1; - if (x <= -42.0f) - /* Underflow. */ - ret = FLT_MIN * FLT_MIN; - else - { - float frac = tx - x; - if (frac > 0.5f) - frac = 1.0f - frac; - float sinpix = (frac <= 0.25f - ? __sinf (M_PIf * frac) - : __cosf (M_PIf * (0.5f - frac))); - int exp2_adj; - float tret = M_PIf / (-x * sinpix - * gammaf_positive (-x, &exp2_adj)); - ret = __scalbnf (tret, -exp2_adj); - math_check_force_underflow_nonneg (ret); - } - } - ret = math_narrow_eval (ret); - } - if (isinf (ret) && x != 0) - { - if (*signgamp < 0) - { - ret = math_narrow_eval (-copysignf (FLT_MAX, ret) * FLT_MAX); - ret = -ret; - } - else - ret = math_narrow_eval (copysignf (FLT_MAX, ret) * FLT_MAX); - return ret; - } - else if (ret == 0) - { - if (*signgamp < 0) - { - ret = math_narrow_eval (-copysignf (FLT_MIN, ret) * FLT_MIN); - ret = -ret; - } - else - ret = math_narrow_eval (copysignf (FLT_MIN, ret) * FLT_MIN); - return ret; + double m = z - 0x1.7p+1, i = __builtin_roundeven(m), step = __builtin_copysign(1.0,i); + double d = m - i, d2 = d*d, d4 = d2*d2, d8 = d4*d4; + double f = (c[0] + d*c[1]) + d2*(c[2] + d*c[3]) + d4*((c[4] + d*c[5]) + d2*(c[6] + d*c[7])) + + d8*((c[8] + d*c[9]) + d2*(c[10] + d*c[11]) + d4*((c[12] + d*c[13]) + d2*(c[14] + d*c[15]))); + int jm = __builtin_fabs(i); + double w = 1; + if(jm){ + z -= 0.5 + step*0.5; + w = z; + for(int j=jm-1; j; j--) {z -= step; w *= z;} + } + if(i<=-0.5) w = 1/w; + f *= w; + b64u64_u rt = {.f = f}; + float r = f; + /* Deal with exceptional cases. */ + if(__builtin_expect(((rt.u+2)&0xfffffff) < 8, 0)){ + for(unsigned j=0;j<sizeof(tb)/sizeof(tb[0]);j++) { + if(t.u==tb[j].u) return tb[j].f + tb[j].df; } - else - return ret; + } + return r; } libm_alias_finite (__ieee754_gammaf_r, __gammaf_r) diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 729f22cd4f..dc07ebd459 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -166,9 +166,9 @@ extern const struct exp2f_data uint64_t tab[1 << EXP2F_TABLE_BITS]; double shift_scaled; double poly[EXP2F_POLY_ORDER]; - double shift; double invln2_scaled; double poly_scaled[EXP2F_POLY_ORDER]; + double shift; } __exp2f_data attribute_hidden; #define LOGF_TABLE_BITS 4 diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index ab6f1da7c0..a15d8e0ab6 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], { td->arg = _dl_make_tlsdesc_dynamic (sym_map, sym->st_value + reloc->r_addend); +# ifndef __loongarch_soft_float + if (RTLD_SUPPORT_LASX) + td->entry = _dl_tlsdesc_dynamic_lasx; + else if (RTLD_SUPPORT_LSX) + td->entry = _dl_tlsdesc_dynamic_lsx; + else +# endif td->entry = _dl_tlsdesc_dynamic; } else diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h new file mode 100644 index 0000000000..3daf991001 --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h @@ -0,0 +1,222 @@ +/* Thread-local storage handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) + + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); + if (__glibc_likely (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } */ + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + ADDI sp, sp, -32 + cfi_adjust_cfa_offset (32) + REG_S t0, sp, 0 + REG_S t1, sp, 8 + REG_S t2, sp, 16 + cfi_rel_offset (12, 0) + cfi_rel_offset (13, 8) + cfi_rel_offset (14, 16) + +/* Runtime Storage Layout of Thread-Local Storage + TP point to the start of TLS block. + + dtv +Low address TCB ----------------> dtv0(counter) + TP --> static_block0 <----- dtv1 + static_block1 <----- dtv2 + static_block2 <----- dtv3 + dynamic_block0 <----- dtv4 +Hign address dynamic_block1 <----- dtv5 */ + + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ + /* If dtv[0].counter < td->gen_count, goto slow path. */ + bltu t2, t1, .Lslow + + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ + slli.d t1, t1, 4 + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ + li.d t2, TLS_DTV_UNALLOCATED + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, + goto slow path. */ + beq t1, t2, .Lslow + + cfi_remember_state + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ + add.d a0, t1, t2 +.Lret: + sub.d a0, a0, tp + REG_L t0, sp, 0 + REG_L t1, sp, 8 + REG_L t2, sp, 16 + ADDI sp, sp, 32 + cfi_adjust_cfa_offset (-32) + RET + +.Lslow: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ + cfi_restore_state + ADDI sp, sp, -FRAME_SIZE + cfi_adjust_cfa_offset (FRAME_SIZE) + REG_S ra, sp, 0 * SZREG + REG_S a1, sp, 1 * SZREG + REG_S a2, sp, 2 * SZREG + REG_S a3, sp, 3 * SZREG + REG_S a4, sp, 4 * SZREG + REG_S a5, sp, 5 * SZREG + REG_S a6, sp, 6 * SZREG + REG_S a7, sp, 7 * SZREG + REG_S t3, sp, 8 * SZREG + REG_S t4, sp, 9 * SZREG + REG_S t5, sp, 10 * SZREG + REG_S t6, sp, 11 * SZREG + REG_S t7, sp, 12 * SZREG + REG_S t8, sp, 13 * SZREG + cfi_rel_offset (1, 0 * SZREG) + cfi_rel_offset (5, 1 * SZREG) + cfi_rel_offset (6, 2 * SZREG) + cfi_rel_offset (7, 3 * SZREG) + cfi_rel_offset (8, 4 * SZREG) + cfi_rel_offset (9, 5 * SZREG) + cfi_rel_offset (10, 6 * SZREG) + cfi_rel_offset (11, 7 * SZREG) + cfi_rel_offset (15, 8 * SZREG) + cfi_rel_offset (16, 9 * SZREG) + cfi_rel_offset (17, 10 * SZREG) + cfi_rel_offset (18, 11 * SZREG) + cfi_rel_offset (19, 12 * SZREG) + cfi_rel_offset (20, 13 * SZREG) + +#ifndef __loongarch_soft_float + + /* Save fcsr0 register. + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases + of some fields in fcsr0. */ + movfcsr2gr t0, fcsr0 + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */ + +#ifdef USE_LASX + #define V_REG_S xvst + #define V_REG_L xvld + #define V_SPACE (-((-32 * SZXREG) & ALMASK)) /* Space for LASX registers. */ + #define V_REG(n) $xr##n + #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ + 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + #define V_REGSZ SZXREG +#elif defined USE_LSX + #define V_REG_S vst + #define V_REG_L vld + #define V_SPACE (-((-32 * SZVREG) & ALMASK)) /* Space for LSX registers. */ + #define V_REG(n) $vr##n + #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ + 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + #define V_REGSZ SZVREG +#else + #define V_REG_S fst.d + #define V_REG_L fld.d + #define V_SPACE (-((-24 * SZFREG) & ALMASK)) /* Space for FLOAT registers. */ + #define V_REG(n) $f##n + #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 + #define V_REGSZ SZFREG +#endif + + ADDI sp, sp, -V_SPACE + cfi_adjust_cfa_offset (V_SPACE) + .irp i,V_REGS + V_REG_S V_REG(\i), sp, \i * V_REGSZ + .endr + +#endif /* #ifndef __loongarch_soft_float */ + + bl HIDDEN_JUMPTARGET(__tls_get_addr) + ADDI a0, a0, -TLS_DTV_OFFSET + +#ifndef __loongarch_soft_float + + .irp i,V_REGS + V_REG_L V_REG(\i), sp, \i * V_REGSZ + .endr + ADDI sp, sp, V_SPACE + cfi_adjust_cfa_offset (-V_SPACE) + + /* Restore fcsr0 register. */ + ld.w t0, sp, FRAME_SIZE + 24 + movgr2fcsr fcsr0, t0 + +#endif /* #ifndef __loongarch_soft_float */ + + REG_L ra, sp, 0 * SZREG + REG_L a1, sp, 1 * SZREG + REG_L a2, sp, 2 * SZREG + REG_L a3, sp, 3 * SZREG + REG_L a4, sp, 4 * SZREG + REG_L a5, sp, 5 * SZREG + REG_L a6, sp, 6 * SZREG + REG_L a7, sp, 7 * SZREG + REG_L t3, sp, 8 * SZREG + REG_L t4, sp, 9 * SZREG + REG_L t5, sp, 10 * SZREG + REG_L t6, sp, 11 * SZREG + REG_L t7, sp, 12 * SZREG + REG_L t8, sp, 13 * SZREG + ADDI sp, sp, FRAME_SIZE + cfi_adjust_cfa_offset (-FRAME_SIZE) + + b .Lret + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S index a6627cc754..be76c07cce 100644 --- a/sysdeps/loongarch/dl-tlsdesc.S +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -59,376 +59,46 @@ _dl_tlsdesc_undefweak: cfi_endproc .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak - #ifdef SHARED -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) - - /* Handler for dynamic TLS symbols. - Prototype: - _dl_tlsdesc_dynamic (tlsdesc *) ; - - The second word of the descriptor points to a - tlsdesc_dynamic_arg structure. - - Returns the offset between the thread pointer and the - object referenced by the argument. - - ptrdiff_t - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) - { - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); - if (__glibc_likely (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val - + td->tlsinfo.ti_offset - - __thread_pointer; - - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; - } */ - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,%function - cfi_startproc - .align 2 -_dl_tlsdesc_dynamic: - /* Save just enough registers to support fast path, if we fall - into slow path we will save additional registers. */ - ADDI sp, sp, -32 - REG_S t0, sp, 0 - REG_S t1, sp, 8 - REG_S t2, sp, 16 - -/* Runtime Storage Layout of Thread-Local Storage - TP point to the start of TLS block. - - dtv -Low address TCB ----------------> dtv0(counter) - TP --> static_block0 <----- dtv1 - static_block1 <----- dtv2 - static_block2 <----- dtv3 - dynamic_block0 <----- dtv4 -Hign address dynamic_block1 <----- dtv5 */ - - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ - /* If dtv[0].counter < td->gen_count, goto slow path. */ - bltu t2, t1, .Lslow - - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ - slli.d t1, t1, 4 - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ - li.d t2, TLS_DTV_UNALLOCATED - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, - goto slow path. */ - beq t1, t2, .Lslow - - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ - add.d a0, t1, t2 -.Lret: - sub.d a0, a0, tp - REG_L t0, sp, 0 - REG_L t1, sp, 8 - REG_L t2, sp, 16 - ADDI sp, sp, 32 - RET - -.Lslow: - /* This is the slow path. We need to call __tls_get_addr() which - means we need to save and restore all the register that the - callee will trash. */ - - /* Save the remaining registers that we must treat as caller save. */ - ADDI sp, sp, -FRAME_SIZE - REG_S ra, sp, 0 * SZREG - REG_S a1, sp, 1 * SZREG - REG_S a2, sp, 2 * SZREG - REG_S a3, sp, 3 * SZREG - REG_S a4, sp, 4 * SZREG - REG_S a5, sp, 5 * SZREG - REG_S a6, sp, 6 * SZREG - REG_S a7, sp, 7 * SZREG - REG_S t3, sp, 8 * SZREG - REG_S t4, sp, 9 * SZREG - REG_S t5, sp, 10 * SZREG - REG_S t6, sp, 11 * SZREG - REG_S t7, sp, 12 * SZREG - REG_S t8, sp, 13 * SZREG - #ifndef __loongarch_soft_float - /* Save fcsr0 register. - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases - of some fields in fcsr0. */ - movfcsr2gr t0, fcsr0 - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ - - /* Whether support LASX. */ - la.global t0, _rtld_global_ro - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET - andi t1, t0, HWCAP_LOONGARCH_LASX - beqz t1, .Llsx - - /* Save 256-bit vector registers. - FIXME: Without vector ABI, save all vector registers. */ - ADDI sp, sp, -FRAME_SIZE_LASX - xvst xr0, sp, 0*SZXREG - xvst xr1, sp, 1*SZXREG - xvst xr2, sp, 2*SZXREG - xvst xr3, sp, 3*SZXREG - xvst xr4, sp, 4*SZXREG - xvst xr5, sp, 5*SZXREG - xvst xr6, sp, 6*SZXREG - xvst xr7, sp, 7*SZXREG - xvst xr8, sp, 8*SZXREG - xvst xr9, sp, 9*SZXREG - xvst xr10, sp, 10*SZXREG - xvst xr11, sp, 11*SZXREG - xvst xr12, sp, 12*SZXREG - xvst xr13, sp, 13*SZXREG - xvst xr14, sp, 14*SZXREG - xvst xr15, sp, 15*SZXREG - xvst xr16, sp, 16*SZXREG - xvst xr17, sp, 17*SZXREG - xvst xr18, sp, 18*SZXREG - xvst xr19, sp, 19*SZXREG - xvst xr20, sp, 20*SZXREG - xvst xr21, sp, 21*SZXREG - xvst xr22, sp, 22*SZXREG - xvst xr23, sp, 23*SZXREG - xvst xr24, sp, 24*SZXREG - xvst xr25, sp, 25*SZXREG - xvst xr26, sp, 26*SZXREG - xvst xr27, sp, 27*SZXREG - xvst xr28, sp, 28*SZXREG - xvst xr29, sp, 29*SZXREG - xvst xr30, sp, 30*SZXREG - xvst xr31, sp, 31*SZXREG - b .Ltga - -.Llsx: - /* Whether support LSX. */ - andi t1, t0, HWCAP_LOONGARCH_LSX - beqz t1, .Lfloat - - /* Save 128-bit vector registers. */ - ADDI sp, sp, -FRAME_SIZE_LSX - vst vr0, sp, 0*SZVREG - vst vr1, sp, 1*SZVREG - vst vr2, sp, 2*SZVREG - vst vr3, sp, 3*SZVREG - vst vr4, sp, 4*SZVREG - vst vr5, sp, 5*SZVREG - vst vr6, sp, 6*SZVREG - vst vr7, sp, 7*SZVREG - vst vr8, sp, 8*SZVREG - vst vr9, sp, 9*SZVREG - vst vr10, sp, 10*SZVREG - vst vr11, sp, 11*SZVREG - vst vr12, sp, 12*SZVREG - vst vr13, sp, 13*SZVREG - vst vr14, sp, 14*SZVREG - vst vr15, sp, 15*SZVREG - vst vr16, sp, 16*SZVREG - vst vr17, sp, 17*SZVREG - vst vr18, sp, 18*SZVREG - vst vr19, sp, 19*SZVREG - vst vr20, sp, 20*SZVREG - vst vr21, sp, 21*SZVREG - vst vr22, sp, 22*SZVREG - vst vr23, sp, 23*SZVREG - vst vr24, sp, 24*SZVREG - vst vr25, sp, 25*SZVREG - vst vr26, sp, 26*SZVREG - vst vr27, sp, 27*SZVREG - vst vr28, sp, 28*SZVREG - vst vr29, sp, 29*SZVREG - vst vr30, sp, 30*SZVREG - vst vr31, sp, 31*SZVREG - b .Ltga - -.Lfloat: - /* Save float registers. */ - ADDI sp, sp, -FRAME_SIZE_FLOAT - FREG_S fa0, sp, 0*SZFREG - FREG_S fa1, sp, 1*SZFREG - FREG_S fa2, sp, 2*SZFREG - FREG_S fa3, sp, 3*SZFREG - FREG_S fa4, sp, 4*SZFREG - FREG_S fa5, sp, 5*SZFREG - FREG_S fa6, sp, 6*SZFREG - FREG_S fa7, sp, 7*SZFREG - FREG_S ft0, sp, 8*SZFREG - FREG_S ft1, sp, 9*SZFREG - FREG_S ft2, sp, 10*SZFREG - FREG_S ft3, sp, 11*SZFREG - FREG_S ft4, sp, 12*SZFREG - FREG_S ft5, sp, 13*SZFREG - FREG_S ft6, sp, 14*SZFREG - FREG_S ft7, sp, 15*SZFREG - FREG_S ft8, sp, 16*SZFREG - FREG_S ft9, sp, 17*SZFREG - FREG_S ft10, sp, 18*SZFREG - FREG_S ft11, sp, 19*SZFREG - FREG_S ft12, sp, 20*SZFREG - FREG_S ft13, sp, 21*SZFREG - FREG_S ft14, sp, 22*SZFREG - FREG_S ft15, sp, 23*SZFREG - -#endif /* #ifndef __loongarch_soft_float */ - -.Ltga: - bl HIDDEN_JUMPTARGET(__tls_get_addr) - ADDI a0, a0, -TLS_DTV_OFFSET - -#ifndef __loongarch_soft_float - - la.global t0, _rtld_global_ro - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET - andi t1, t0, HWCAP_LOONGARCH_LASX - beqz t1, .Llsx1 - - /* Restore 256-bit vector registers. */ - xvld xr0, sp, 0*SZXREG - xvld xr1, sp, 1*SZXREG - xvld xr2, sp, 2*SZXREG - xvld xr3, sp, 3*SZXREG - xvld xr4, sp, 4*SZXREG - xvld xr5, sp, 5*SZXREG - xvld xr6, sp, 6*SZXREG - xvld xr7, sp, 7*SZXREG - xvld xr8, sp, 8*SZXREG - xvld xr9, sp, 9*SZXREG - xvld xr10, sp, 10*SZXREG - xvld xr11, sp, 11*SZXREG - xvld xr12, sp, 12*SZXREG - xvld xr13, sp, 13*SZXREG - xvld xr14, sp, 14*SZXREG - xvld xr15, sp, 15*SZXREG - xvld xr16, sp, 16*SZXREG - xvld xr17, sp, 17*SZXREG - xvld xr18, sp, 18*SZXREG - xvld xr19, sp, 19*SZXREG - xvld xr20, sp, 20*SZXREG - xvld xr21, sp, 21*SZXREG - xvld xr22, sp, 22*SZXREG - xvld xr23, sp, 23*SZXREG - xvld xr24, sp, 24*SZXREG - xvld xr25, sp, 25*SZXREG - xvld xr26, sp, 26*SZXREG - xvld xr27, sp, 27*SZXREG - xvld xr28, sp, 28*SZXREG - xvld xr29, sp, 29*SZXREG - xvld xr30, sp, 30*SZXREG - xvld xr31, sp, 31*SZXREG - ADDI sp, sp, FRAME_SIZE_LASX - b .Lfcsr - -.Llsx1: - andi t1, t0, HWCAP_LOONGARCH_LSX - beqz t1, .Lfloat1 - - /* Restore 128-bit vector registers. */ - vld vr0, sp, 0*SZVREG - vld vr1, sp, 1*SZVREG - vld vr2, sp, 2*SZVREG - vld vr3, sp, 3*SZVREG - vld vr4, sp, 4*SZVREG - vld vr5, sp, 5*SZVREG - vld vr6, sp, 6*SZVREG - vld vr7, sp, 7*SZVREG - vld vr8, sp, 8*SZVREG - vld vr9, sp, 9*SZVREG - vld vr10, sp, 10*SZVREG - vld vr11, sp, 11*SZVREG - vld vr12, sp, 12*SZVREG - vld vr13, sp, 13*SZVREG - vld vr14, sp, 14*SZVREG - vld vr15, sp, 15*SZVREG - vld vr16, sp, 16*SZVREG - vld vr17, sp, 17*SZVREG - vld vr18, sp, 18*SZVREG - vld vr19, sp, 19*SZVREG - vld vr20, sp, 20*SZVREG - vld vr21, sp, 21*SZVREG - vld vr22, sp, 22*SZVREG - vld vr23, sp, 23*SZVREG - vld vr24, sp, 24*SZVREG - vld vr25, sp, 25*SZVREG - vld vr26, sp, 26*SZVREG - vld vr27, sp, 27*SZVREG - vld vr28, sp, 28*SZVREG - vld vr29, sp, 29*SZVREG - vld vr30, sp, 30*SZVREG - vld vr31, sp, 31*SZVREG - ADDI sp, sp, FRAME_SIZE_LSX - b .Lfcsr - -.Lfloat1: - /* Restore float registers. */ - FREG_L fa0, sp, 0*SZFREG - FREG_L fa1, sp, 1*SZFREG - FREG_L fa2, sp, 2*SZFREG - FREG_L fa3, sp, 3*SZFREG - FREG_L fa4, sp, 4*SZFREG - FREG_L fa5, sp, 5*SZFREG - FREG_L fa6, sp, 6*SZFREG - FREG_L fa7, sp, 7*SZFREG - FREG_L ft0, sp, 8*SZFREG - FREG_L ft1, sp, 9*SZFREG - FREG_L ft2, sp, 10*SZFREG - FREG_L ft3, sp, 11*SZFREG - FREG_L ft4, sp, 12*SZFREG - FREG_L ft5, sp, 13*SZFREG - FREG_L ft6, sp, 14*SZFREG - FREG_L ft7, sp, 15*SZFREG - FREG_L ft8, sp, 16*SZFREG - FREG_L ft9, sp, 17*SZFREG - FREG_L ft10, sp, 18*SZFREG - FREG_L ft11, sp, 19*SZFREG - FREG_L ft12, sp, 20*SZFREG - FREG_L ft13, sp, 21*SZFREG - FREG_L ft14, sp, 22*SZFREG - FREG_L ft15, sp, 23*SZFREG - ADDI sp, sp, FRAME_SIZE_FLOAT - -.Lfcsr: - /* Restore fcsr0 register. */ - ld.w t0, sp, FRAME_SIZE + 24 - movgr2fcsr fcsr0, t0 +#define USE_LASX +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx +#define Lret Lret_lasx +#define Lslow Lslow_lasx +#include "dl-tlsdesc-dynamic.h" +#undef FRAME_SIZE +#undef V_REG_S +#undef V_REG_L +#undef V_SPACE +#undef V_REG +#undef V_REGS +#undef V_REGSZ +#undef USE_LASX +#undef _dl_tlsdesc_dynamic +#undef Lret +#undef Lslow + +#define USE_LSX +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx +#define Lret Lret_lsx +#define Lslow Lslow_lsx +#include "dl-tlsdesc-dynamic.h" +#undef FRAME_SIZE +#undef V_REG_S +#undef V_REG_L +#undef V_SPACE +#undef V_REG +#undef V_REGS +#undef V_REGSZ +#undef USE_LSX +#undef _dl_tlsdesc_dynamic +#undef Lret +#undef Lslow #endif /* #ifndef __loongarch_soft_float */ - REG_L ra, sp, 0 * SZREG - REG_L a1, sp, 1 * SZREG - REG_L a2, sp, 2 * SZREG - REG_L a3, sp, 3 * SZREG - REG_L a4, sp, 4 * SZREG - REG_L a5, sp, 5 * SZREG - REG_L a6, sp, 6 * SZREG - REG_L a7, sp, 7 * SZREG - REG_L t3, sp, 8 * SZREG - REG_L t4, sp, 9 * SZREG - REG_L t5, sp, 10 * SZREG - REG_L t6, sp, 11 * SZREG - REG_L t7, sp, 12 * SZREG - REG_L t8, sp, 13 * SZREG - ADDI sp, sp, FRAME_SIZE - - b .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) +#include "dl-tlsdesc-dynamic.h" #endif /* #ifdef SHARED */ diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h index ff8c69cb93..45c43a5b52 100644 --- a/sysdeps/loongarch/dl-tlsdesc.h +++ b/sysdeps/loongarch/dl-tlsdesc.h @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); #ifdef SHARED extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); +#ifndef __loongarch_soft_float +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); +#endif extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); #endif diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps index bdfd683454..75e21ee2a8 100644 --- a/sysdeps/loongarch/lp64/libm-test-ulps +++ b/sysdeps/loongarch/lp64/libm-test-ulps @@ -976,24 +976,24 @@ float: 1 ldouble: 3 Function: "exp10m1": -double: 2 -float: 1 -ldouble: 1 +double: 4 +float: 2 +ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -ldouble: 3 +double: 2 +float: 3 +ldouble: 6 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 3 +double: 5 +float: 3 +ldouble: 6 Function: "exp2": double: 1 @@ -1013,24 +1013,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -ldouble: 1 +double: 2 +float: 2 +ldouble: 2 Function: "exp2m1_downward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 2 +double: 3 +float: 2 +ldouble: 4 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 5 Function: "exp_downward": double: 1 @@ -1206,24 +1206,24 @@ float: 2 ldouble: 1 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 2 +float: 3 +ldouble: 4 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 2 +ldouble: 3 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 4 Function: "log1p": double: 1 @@ -1263,8 +1263,8 @@ double: 3 ldouble: 1 Function: "log2p1": -double: 1 -float: 1 +double: 2 +float: 2 ldouble: 3 Function: "log2p1_downward": @@ -1278,9 +1278,9 @@ float: 2 ldouble: 2 Function: "log2p1_upward": -double: 1 +double: 2 float: 2 -ldouble: 2 +ldouble: 3 Function: "log_downward": ldouble: 1 @@ -1432,22 +1432,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym index 213d0b3074..9f80fceca6 100644 --- a/sysdeps/loongarch/tlsdesc.sym +++ b/sysdeps/loongarch/tlsdesc.sym @@ -4,12 +4,6 @@ #include <link.h> #include <dl-tlsdesc.h> -#define SHARED 1 - -#include <ldsodefs.h> - -#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) - -- -- Abuse tls.h macros to derive offsets relative to the thread register. @@ -23,6 +17,3 @@ DTV_COUNTER offsetof(dtv_t, counter) TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED TLS_DTV_OFFSET TLS_DTV_OFFSET SIZE_OF_TCB sizeof(tcbhead_t) -GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) -HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX -HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX diff --git a/sysdeps/m68k/coldfire/fpu/libm-test-ulps b/sysdeps/m68k/coldfire/fpu/libm-test-ulps index 1b25a70e3f..ae05498719 100644 --- a/sysdeps/m68k/coldfire/fpu/libm-test-ulps +++ b/sysdeps/m68k/coldfire/fpu/libm-test-ulps @@ -146,7 +146,6 @@ double: 1 Function: "tgamma": double: 1 -float: 1 Function: "y0": double: 2 diff --git a/sysdeps/m68k/m680x0/fpu/libm-test-ulps b/sysdeps/m68k/m680x0/fpu/libm-test-ulps index 6eacfb6b6f..73e98eb479 100644 --- a/sysdeps/m68k/m680x0/fpu/libm-test-ulps +++ b/sysdeps/m68k/m680x0/fpu/libm-test-ulps @@ -1208,22 +1208,18 @@ float: 1 Function: "tgamma": double: 3 -float: 9 ldouble: 9 Function: "tgamma_downward": double: 3 -float: 9 ldouble: 9 Function: "tgamma_towardzero": double: 3 -float: 9 ldouble: 9 Function: "tgamma_upward": double: 2 -float: 9 ldouble: 9 Function: "y0": diff --git a/sysdeps/mach/hurd/faccessat.c b/sysdeps/mach/hurd/faccessat.c index 998e31962f..6d3d123abb 100644 --- a/sysdeps/mach/hurd/faccessat.c +++ b/sysdeps/mach/hurd/faccessat.c @@ -185,6 +185,15 @@ __faccessat_common (int fd, const char *file, int type, int at_flags, return errfunc (err); } + /* If all we wanted was to check for a file existing at the path, + then we already got our answer, and we don't need to call + file_check_access (). */ + if (type == F_OK) + { + __mach_port_deallocate (__mach_task_self (), io); + return 0; + } + /* Find out what types of access we are allowed to this file. */ err = __file_check_access (io, &allowed); __mach_port_deallocate (__mach_task_self (), io); diff --git a/sysdeps/mach/hurd/fcntl.c b/sysdeps/mach/hurd/fcntl.c index 9451ed5649..2583f54ef2 100644 --- a/sysdeps/mach/hurd/fcntl.c +++ b/sysdeps/mach/hurd/fcntl.c @@ -148,6 +148,7 @@ __libc_fcntl (int fd, int cmd, ...) cmd = F_SETLKW64; break; default: + va_end (ap); return __hurd_fail (EINVAL); } @@ -204,7 +205,10 @@ __libc_fcntl (int fd, int cmd, ...) && fl->l_start != fl64.l_start) || (sizeof fl->l_len != sizeof fl64.l_len && fl->l_len != fl64.l_len)) - return __hurd_fail (EOVERFLOW); + { + va_end (ap); + return __hurd_fail (EOVERFLOW); + } } result = err ? __hurd_dfail (fd, err) : 0; diff --git a/sysdeps/mach/hurd/i386/libc.abilist b/sysdeps/mach/hurd/i386/libc.abilist index 2adf98e71d..dc2c6b505b 100644 --- a/sysdeps/mach/hurd/i386/libc.abilist +++ b/sysdeps/mach/hurd/i386/libc.abilist @@ -29,7 +29,17 @@ GLIBC_2.11 mkostemps64 F GLIBC_2.11 mkstemps F GLIBC_2.11 mkstemps64 F GLIBC_2.12 __pthread_self F +GLIBC_2.12 pthread_attr_getdetachstate F +GLIBC_2.12 pthread_attr_getinheritsched F +GLIBC_2.12 pthread_attr_getschedparam F +GLIBC_2.12 pthread_attr_getschedpolicy F +GLIBC_2.12 pthread_attr_setdetachstate F +GLIBC_2.12 pthread_attr_setinheritsched F +GLIBC_2.12 pthread_attr_setschedpolicy F +GLIBC_2.12 pthread_equal F +GLIBC_2.12 pthread_getschedparam F GLIBC_2.12 pthread_self F +GLIBC_2.12 pthread_setschedparam F GLIBC_2.13 __fentry__ F GLIBC_2.14 syncfs F GLIBC_2.15 __fdelt_chk F diff --git a/sysdeps/mach/hurd/x86_64/vm_param.h b/sysdeps/mach/hurd/x86_64/vm_param.h index 460445f49e..a6e63a3a1a 100644 --- a/sysdeps/mach/hurd/x86_64/vm_param.h +++ b/sysdeps/mach/hurd/x86_64/vm_param.h @@ -15,7 +15,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef _X86_86_VM_PARAM_H +#ifndef _X86_64_VM_PARAM_H #define _X86_64_VM_PARAM_H /* Arbitrary start of the brk. This is after usual binary and library mappings. */ diff --git a/sysdeps/mach/sleep.c b/sysdeps/mach/sleep.c index 572be150bc..aa750c13c5 100644 --- a/sysdeps/mach/sleep.c +++ b/sysdeps/mach/sleep.c @@ -43,6 +43,9 @@ __sleep (unsigned int seconds) after = time_now (); __mach_port_destroy (__mach_task_self (), recv); + if (after - before > seconds) + return 0; + return seconds - (after - before); } weak_alias (__sleep, sleep) diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps index d3666eb7d4..98ea940ccc 100644 --- a/sysdeps/microblaze/libm-test-ulps +++ b/sysdeps/microblaze/libm-test-ulps @@ -257,7 +257,6 @@ float: 2 Function: "tgamma": double: 5 -float: 4 Function: "y0": double: 2 diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps index 1c27b6615b..cc1600c2ab 100644 --- a/sysdeps/mips/mips32/libm-test-ulps +++ b/sysdeps/mips/mips32/libm-test-ulps @@ -778,20 +778,20 @@ double: 2 float: 1 Function: "exp10m1": -double: 2 -float: 1 +double: 3 +float: 2 Function: "exp10m1_downward": -double: 1 -float: 1 +double: 3 +float: 3 Function: "exp10m1_towardzero": -double: 1 -float: 1 +double: 2 +float: 3 Function: "exp10m1_upward": -double: 3 -float: 1 +double: 4 +float: 4 Function: "exp2": double: 1 @@ -810,20 +810,20 @@ double: 1 float: 1 Function: "exp2m1": -double: 1 -float: 1 +double: 2 +float: 2 Function: "exp2m1_downward": -double: 2 -float: 1 +double: 3 +float: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 +double: 3 +float: 2 Function: "exp2m1_upward": -double: 1 -float: 1 +double: 3 +float: 3 Function: "exp_downward": double: 1 @@ -967,19 +967,19 @@ float: 2 Function: "log10p1": double: 1 -float: 1 +float: 2 Function: "log10p1_downward": double: 2 -float: 1 +float: 3 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 Function: "log10p1_upward": double: 2 -float: 1 +float: 3 Function: "log1p": double: 1 @@ -1015,7 +1015,7 @@ float: 3 Function: "log2p1": double: 1 -float: 1 +float: 2 ldouble: 1 Function: "log2p1_downward": @@ -1029,7 +1029,7 @@ float: 2 ldouble: 2 Function: "log2p1_upward": -double: 1 +double: 2 float: 2 ldouble: 1 @@ -1156,19 +1156,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 7 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps index 902551b448..88dbb20401 100644 --- a/sysdeps/mips/mips64/libm-test-ulps +++ b/sysdeps/mips/mips64/libm-test-ulps @@ -977,24 +977,24 @@ float: 1 ldouble: 3 Function: "exp10m1": -double: 2 -float: 1 -ldouble: 1 +double: 3 +float: 2 +ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -ldouble: 3 +double: 2 +float: 3 +ldouble: 6 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 3 +double: 4 +float: 4 +ldouble: 6 Function: "exp2": double: 1 @@ -1017,24 +1017,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -ldouble: 1 - -Function: "exp2m1_downward": double: 2 -float: 1 +float: 2 ldouble: 2 +Function: "exp2m1_downward": +double: 3 +float: 3 +ldouble: 3 + Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 2 +double: 3 +float: 2 +ldouble: 4 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 5 Function: "exp_downward": double: 1 @@ -1212,23 +1212,23 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 1 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 2 +float: 3 +ldouble: 4 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 2 +ldouble: 3 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 4 Function: "log1p": double: 1 @@ -1272,7 +1272,7 @@ ldouble: 1 Function: "log2p1": double: 1 -float: 1 +float: 2 ldouble: 3 Function: "log2p1_downward": @@ -1286,9 +1286,9 @@ float: 2 ldouble: 2 Function: "log2p1_upward": -double: 1 +double: 2 float: 2 -ldouble: 2 +ldouble: 3 Function: "log_downward": float: 2 @@ -1444,22 +1444,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps index c8d1a722f7..df6e932cb6 100644 --- a/sysdeps/nios2/libm-test-ulps +++ b/sysdeps/nios2/libm-test-ulps @@ -266,7 +266,6 @@ float: 2 Function: "tgamma": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c index ef199ddbc3..52c90e61e3 100644 --- a/sysdeps/nptl/_Fork.c +++ b/sysdeps/nptl/_Fork.c @@ -17,11 +17,18 @@ <https://www.gnu.org/licenses/>. */ #include <arch-fork.h> +#include <libc-lock.h> #include <pthreadP.h> pid_t _Fork (void) { + /* Block all signals to avoid revealing the inconsistent TCB state + to a signal handler after fork. The abort lock should AS-safe + to avoid deadlock if _Fork is called from a signal handler. */ + internal_sigset_t original_sigmask; + __abort_lock_rdlock (&original_sigmask); + pid_t pid = arch_fork (&THREAD_SELF->tid); if (pid == 0) { @@ -44,6 +51,8 @@ _Fork (void) INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head, sizeof (struct robust_list_head)); } + + __abort_lock_unlock (&original_sigmask); return pid; } libc_hidden_def (_Fork) diff --git a/sysdeps/nptl/cancellation-pc-check.h b/sysdeps/nptl/cancellation-pc-check.h new file mode 100644 index 0000000000..cb38ad6819 --- /dev/null +++ b/sysdeps/nptl/cancellation-pc-check.h @@ -0,0 +1,54 @@ +/* Architecture specific code for pthread cancellation handling. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _NPTL_CANCELLATION_PC_CHECK +#define _NPTL_CANCELLATION_PC_CHECK + +#include <sigcontextinfo.h> + +/* For syscalls with side-effects (e.g read that might return partial read), + the kernel cannot restart the syscall when interrupted by a signal, it must + return from the call with whatever partial result. In this case, the saved + program counter is set just after the syscall instruction, so the SIGCANCEL + handler should not act on cancellation. + + The __syscall_cancel_arch function, used for all cancellable syscalls, + contains two extra markers, __syscall_cancel_arch_start and + __syscall_cancel_arch_end. The former points to just before the initial + conditional branch that checks if the thread has received a cancellation + request, while former points to the instruction after the one responsible + to issue the syscall. + + The function check if the program counter (PC) from ucontext_t CTX is + within the start and then end boundary from the __syscall_cancel_arch + bridge. Return TRUE if the PC is within the boundary, meaning the + syscall does not have any side effects; or FALSE otherwise. */ + +static __always_inline bool +cancellation_pc_check (void *ctx) +{ + /* Both are defined in syscall_cancel.S. */ + extern const char __syscall_cancel_arch_start[1]; + extern const char __syscall_cancel_arch_end[1]; + + uintptr_t pc = sigcontext_get_pc (ctx); + return pc >= (uintptr_t) __syscall_cancel_arch_start + && pc < (uintptr_t) __syscall_cancel_arch_end; +} + +#endif diff --git a/sysdeps/nptl/lowlevellock-futex.h b/sysdeps/nptl/lowlevellock-futex.h index 278213a37b..c205806300 100644 --- a/sysdeps/nptl/lowlevellock-futex.h +++ b/sysdeps/nptl/lowlevellock-futex.h @@ -21,7 +21,6 @@ #ifndef __ASSEMBLER__ # include <sysdep.h> -# include <sysdep-cancel.h> # include <kernel-features.h> #endif @@ -120,21 +119,10 @@ nr_wake, nr_move, mutex, val) /* Like lll_futex_wait, but acting as a cancellable entrypoint. */ -# define lll_futex_wait_cancel(futexp, val, private) \ - ({ \ - int __oldtype = LIBC_CANCEL_ASYNC (); \ - long int __err = lll_futex_wait (futexp, val, LLL_SHARED); \ - LIBC_CANCEL_RESET (__oldtype); \ - __err; \ - }) - -/* Like lll_futex_timed_wait, but acting as a cancellable entrypoint. */ -# define lll_futex_timed_wait_cancel(futexp, val, timeout, private) \ - ({ \ - int __oldtype = LIBC_CANCEL_ASYNC (); \ - long int __err = lll_futex_timed_wait (futexp, val, timeout, private); \ - LIBC_CANCEL_RESET (__oldtype); \ - __err; \ +# define lll_futex_wait_cancel(futexp, val, private) \ + ({ \ + int __op = __lll_private_flag (FUTEX_WAIT, private); \ + INTERNAL_SYSCALL_CANCEL (futex, futexp, __op, val, NULL); \ }) #endif /* !__ASSEMBLER__ */ diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h index 30e8a2d177..c2db165052 100644 --- a/sysdeps/nptl/pthreadP.h +++ b/sysdeps/nptl/pthreadP.h @@ -261,10 +261,12 @@ libc_hidden_proto (__pthread_unregister_cancel) /* Called when a thread reacts on a cancellation request. */ static inline void __attribute ((noreturn, always_inline)) -__do_cancel (void) +__do_cancel (void *result) { struct pthread *self = THREAD_SELF; + self->result = result; + /* Make sure we get no more cancellations. */ atomic_fetch_or_relaxed (&self->cancelhandling, EXITING_BITMASK); @@ -272,6 +274,13 @@ __do_cancel (void) THREAD_GETMEM (self, cleanup_jmp_buf)); } +extern long int __syscall_cancel_arch (volatile int *, __syscall_arg_t nr, + __syscall_arg_t arg1, __syscall_arg_t arg2, __syscall_arg_t arg3, + __syscall_arg_t arg4, __syscall_arg_t arg5, __syscall_arg_t arg6 + __SYSCALL_CANCEL7_ARCH_ARG_DEF) attribute_hidden; + +extern _Noreturn void __syscall_do_cancel (void) attribute_hidden; + /* Internal prototypes. */ @@ -508,6 +517,7 @@ libc_hidden_proto (__pthread_kill) extern int __pthread_cancel (pthread_t th); extern int __pthread_kill_internal (pthread_t threadid, int signo) attribute_hidden; +extern int __pthread_raise_internal (int signo) attribute_hidden; extern void __pthread_exit (void *value) __attribute__ ((__noreturn__)); libc_hidden_proto (__pthread_exit) extern int __pthread_join (pthread_t threadid, void **thread_return); diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps index 59b9f072f5..4ccb136298 100644 --- a/sysdeps/or1k/fpu/libm-test-ulps +++ b/sysdeps/or1k/fpu/libm-test-ulps @@ -1066,19 +1066,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 9 Function: "tgamma_towardzero": double: 9 -float: 8 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps index 726855faaa..7087cf9add 100644 --- a/sysdeps/or1k/nofpu/libm-test-ulps +++ b/sysdeps/or1k/nofpu/libm-test-ulps @@ -1064,19 +1064,15 @@ float: 3 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_downward": double: 9 -float: 9 Function: "tgamma_towardzero": double: 9 -float: 8 Function: "tgamma_upward": double: 9 -float: 8 Function: "y0": double: 3 diff --git a/sysdeps/posix/profil.c b/sysdeps/posix/profil.c index 3b3c87e25a..6f4aa00eeb 100644 --- a/sysdeps/posix/profil.c +++ b/sysdeps/posix/profil.c @@ -82,7 +82,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale) if (__setitimer (ITIMER_PROF, &otimer, NULL) < 0) return -1; samples = NULL; - return __sigaction (SIGPROF, &oact, NULL); + return __libc_sigaction (SIGPROF, &oact, NULL); } if (samples) @@ -90,7 +90,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale) /* Was already turned on. Restore old timer and signal handler first. */ if (__setitimer (ITIMER_PROF, &otimer, NULL) < 0 - || __sigaction (SIGPROF, &oact, NULL) < 0) + || __libc_sigaction (SIGPROF, &oact, NULL) < 0) return -1; } #else @@ -114,7 +114,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale) #endif act.sa_flags |= SA_RESTART; __sigfillset (&act.sa_mask); - if (__sigaction (SIGPROF, &act, oact_ptr) < 0) + if (__libc_sigaction (SIGPROF, &act, oact_ptr) < 0) return -1; timer.it_value.tv_sec = 0; diff --git a/sysdeps/posix/tempname.c b/sysdeps/posix/tempname.c index c00fe0c181..fc30958a0c 100644 --- a/sysdeps/posix/tempname.c +++ b/sysdeps/posix/tempname.c @@ -117,6 +117,8 @@ random_bits (random_value *r, random_value s) succeed. */ #if !_LIBC *r = mix_random_values (v, clock ()); +#else + *r = v; #endif return false; } diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps index 2e038492cd..dcefbcefcf 100644 --- a/sysdeps/powerpc/fpu/libm-test-ulps +++ b/sysdeps/powerpc/fpu/libm-test-ulps @@ -1185,28 +1185,28 @@ float128: 3 ldouble: 4 Function: "exp10m1": -double: 2 -float: 1 -float128: 1 -ldouble: 2 +double: 4 +float: 2 +float128: 3 +ldouble: 4 Function: "exp10m1_downward": -double: 1 -float: 1 -float128: 3 +double: 3 +float: 3 +float128: 6 ldouble: 9 Function: "exp10m1_towardzero": -double: 1 -float: 1 -float128: 3 +double: 2 +float: 3 +float128: 6 ldouble: 9 Function: "exp10m1_upward": -double: 3 -float: 1 -float128: 3 -ldouble: 4 +double: 5 +float: 3 +float128: 6 +ldouble: 8 Function: "exp2": double: 1 @@ -1230,28 +1230,28 @@ float128: 2 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -float128: 1 -ldouble: 2 - -Function: "exp2m1_downward": -double: 1 -float: 1 +double: 2 +float: 2 float128: 2 ldouble: 3 +Function: "exp2m1_downward": +double: 3 +float: 3 +float128: 3 +ldouble: 7 + Function: "exp2m1_towardzero": -double: 2 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 2 +float128: 4 +ldouble: 8 Function: "exp2m1_upward": -double: 1 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 3 +float128: 5 +ldouble: 9 Function: "exp_downward": double: 1 @@ -1507,28 +1507,28 @@ float128: 1 ldouble: 2 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 float128: 3 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -float128: 2 -ldouble: 4 +float: 3 +float128: 4 +ldouble: 6 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -float128: 2 -ldouble: 4 +float128: 3 +ldouble: 6 Function: "log10p1_upward": double: 2 -float: 1 -float128: 3 -ldouble: 4 +float: 3 +float128: 4 +ldouble: 6 Function: "log1p": double: 1 @@ -1828,25 +1828,21 @@ ldouble: 6 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps index dc9b499cc4..14be978dc3 100644 --- a/sysdeps/powerpc/nofpu/libm-test-ulps +++ b/sysdeps/powerpc/nofpu/libm-test-ulps @@ -986,24 +986,24 @@ float: 1 ldouble: 4 Function: "exp10m1": -double: 2 -float: 1 +double: 3 +float: 2 ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 +double: 3 +float: 3 ldouble: 9 Function: "exp10m1_towardzero": -double: 1 -float: 1 +double: 2 +float: 3 ldouble: 9 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 4 +double: 4 +float: 4 +ldouble: 10 Function: "exp2": double: 1 @@ -1026,24 +1026,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 +double: 2 +float: 2 ldouble: 2 Function: "exp2m1_downward": -double: 2 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 7 Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 3 +double: 3 +float: 2 +ldouble: 8 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 9 Function: "exp_downward": double: 1 @@ -1269,23 +1269,23 @@ ldouble: 2 Function: "log10p1": double: 1 -float: 1 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 4 +float: 3 +ldouble: 6 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 4 +ldouble: 6 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 6 Function: "log1p": double: 1 @@ -1560,22 +1560,18 @@ ldouble: 6 Function: "tgamma": double: 9 -float: 8 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h index 62de4ca2e5..852a755c7c 100644 --- a/sysdeps/powerpc/powerpc32/sysdep.h +++ b/sysdeps/powerpc/powerpc32/sysdep.h @@ -104,6 +104,9 @@ GOT_LABEL: ; \ # define JUMPTARGET(name) name #endif +#define TAIL_CALL_NO_RETURN(__func) \ + b __func@local + #if defined SHARED && defined PIC && !defined NO_HIDDEN # undef HIDDEN_JUMPTARGET # define HIDDEN_JUMPTARGET(name) __GI_##name##@local diff --git a/sysdeps/powerpc/powerpc64/le/Makefile b/sysdeps/powerpc/powerpc64/le/Makefile index 9d568d4f44..b77775cf95 100644 --- a/sysdeps/powerpc/powerpc64/le/Makefile +++ b/sysdeps/powerpc/powerpc64/le/Makefile @@ -129,6 +129,10 @@ CFLAGS-tst-strtod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod-nan-locale.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-nan-locale.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod1i.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod3.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod4.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod5i.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod6.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom-locale.c += $(type-float128-CFLAGS) diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S index 603bde1e39..2f50625a19 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S @@ -42,22 +42,48 @@ if USE_AS_STPCPY is defined. - The implementation can load bytes past a null terminator, but only - up to the next 16B boundary, so it never crosses a page. */ + This implementation never reads across a page boundary, but may + read beyond the NUL terminator. */ -/* Load quadword at addr+offset to vreg, check for null bytes, +/* Load 4 quadwords, merge into one VR for speed and check for NUL + and branch to label if NUL is found. */ +#define CHECK_64B(offset,addr,label) \ + lxv 32+v4,(offset+0)(addr); \ + lxv 32+v5,(offset+16)(addr); \ + lxv 32+v6,(offset+32)(addr); \ + lxv 32+v7,(offset+48)(addr); \ + vminub v14,v4,v5; \ + vminub v15,v6,v7; \ + vminub v16,v14,v15; \ + vcmpequb. v0,v16,v18; \ + beq cr6,$+12; \ + li r7,offset; \ + b L(label); \ + stxv 32+v4,(offset+0)(r11); \ + stxv 32+v5,(offset+16)(r11); \ + stxv 32+v6,(offset+32)(r11); \ + stxv 32+v7,(offset+48)(r11) + +/* Load quadword at addr+offset to vreg, check for NUL bytes, and branch to label if any are found. */ -#define CHECK16(vreg,offset,addr,label) \ - lxv vreg+32,offset(addr); \ - vcmpequb. v6,vreg,v18; \ +#define CHECK_16B(vreg,offset,addr,label) \ + lxv vreg+32,offset(addr); \ + vcmpequb. v15,vreg,v18; \ bne cr6,L(label); -.machine power9 +/* Store vreg2 with length if NUL is found. */ +#define STORE_WITH_LEN(vreg1,vreg2,reg) \ + vctzlsbb r8,vreg1; \ + addi r9,r8,1; \ + sldi r9,r9,56; \ + stxvl 32+vreg2,reg,r9; + +.machine power9 ENTRY_TOCLESS (FUNC_NAME, 4) CALL_MCOUNT 2 - vspltisb v18,0 /* Zeroes in v18 */ - vspltisb v19,-1 /* 0xFF bytes in v19 */ + vspltisb v18,0 /* Zeroes in v18. */ + vspltisb v19,-1 /* 0xFF bytes in v19. */ /* Next 16B-aligned address. Prepare address for L(loop). */ addi r5,r4,16 @@ -70,14 +96,11 @@ ENTRY_TOCLESS (FUNC_NAME, 4) lvsr v1,0,r4 vperm v0,v19,v0,v1 - vcmpequb. v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */ + vcmpequb. v6,v0,v18 /* 0xff if byte is NUL, 0x00 otherwise. */ beq cr6,L(no_null) - /* There's a null byte. */ - vctzlsbb r8,v6 /* Number of trailing zeroes */ - addi r9,r8,1 /* Add null byte. */ - sldi r10,r9,56 /* stxvl wants size in top 8 bits. */ - stxvl 32+v0,r3,r10 /* Partial store */ + /* There's a NUL byte. */ + STORE_WITH_LEN(v6,v0,r3) #ifdef USE_AS_STPCPY /* stpcpy returns the dest address plus the size not counting the @@ -87,17 +110,22 @@ ENTRY_TOCLESS (FUNC_NAME, 4) blr L(no_null): - sldi r10,r8,56 /* stxvl wants size in top 8 bits */ - stxvl 32+v0,r3,r10 /* Partial store */ + sldi r10,r8,56 /* stxvl wants size in top 8 bits. */ + stxvl 32+v0,r3,r10 /* Partial store. */ +/* The main loop is optimized for longer strings(> 512 bytes), + so checking the first bytes in 16B chunks benefits shorter + strings a lot. */ .p2align 4 -L(loop): - CHECK16(v0,0,r5,tail1) - CHECK16(v1,16,r5,tail2) - CHECK16(v2,32,r5,tail3) - CHECK16(v3,48,r5,tail4) - CHECK16(v4,64,r5,tail5) - CHECK16(v5,80,r5,tail6) +L(aligned): + CHECK_16B(v0,0,r5,tail1) + CHECK_16B(v1,16,r5,tail2) + CHECK_16B(v2,32,r5,tail3) + CHECK_16B(v3,48,r5,tail4) + CHECK_16B(v4,64,r5,tail5) + CHECK_16B(v5,80,r5,tail6) + CHECK_16B(v6,96,r5,tail7) + CHECK_16B(v7,112,r5,tail8) stxv 32+v0,0(r11) stxv 32+v1,16(r11) @@ -105,21 +133,146 @@ L(loop): stxv 32+v3,48(r11) stxv 32+v4,64(r11) stxv 32+v5,80(r11) + stxv 32+v6,96(r11) + stxv 32+v7,112(r11) - addi r5,r5,96 - addi r11,r11,96 + addi r11,r11,128 + + CHECK_16B(v0,128,r5,tail1) + CHECK_16B(v1,128+16,r5,tail2) + CHECK_16B(v2,128+32,r5,tail3) + CHECK_16B(v3,128+48,r5,tail4) + CHECK_16B(v4,128+64,r5,tail5) + CHECK_16B(v5,128+80,r5,tail6) + CHECK_16B(v6,128+96,r5,tail7) + CHECK_16B(v7,128+112,r5,tail8) + + stxv 32+v0,0(r11) + stxv 32+v1,16(r11) + stxv 32+v2,32(r11) + stxv 32+v3,48(r11) + stxv 32+v4,64(r11) + stxv 32+v5,80(r11) + stxv 32+v6,96(r11) + stxv 32+v7,112(r11) + + addi r11,r11,128 + + CHECK_16B(v0,256,r5,tail1) + CHECK_16B(v1,256+16,r5,tail2) + CHECK_16B(v2,256+32,r5,tail3) + CHECK_16B(v3,256+48,r5,tail4) + CHECK_16B(v4,256+64,r5,tail5) + CHECK_16B(v5,256+80,r5,tail6) + CHECK_16B(v6,256+96,r5,tail7) + CHECK_16B(v7,256+112,r5,tail8) + + stxv 32+v0,0(r11) + stxv 32+v1,16(r11) + stxv 32+v2,32(r11) + stxv 32+v3,48(r11) + stxv 32+v4,64(r11) + stxv 32+v5,80(r11) + stxv 32+v6,96(r11) + stxv 32+v7,112(r11) + + addi r11,r11,128 + + CHECK_16B(v0,384,r5,tail1) + CHECK_16B(v1,384+16,r5,tail2) + CHECK_16B(v2,384+32,r5,tail3) + CHECK_16B(v3,384+48,r5,tail4) + CHECK_16B(v4,384+64,r5,tail5) + CHECK_16B(v5,384+80,r5,tail6) + CHECK_16B(v6,384+96,r5,tail7) + CHECK_16B(v7,384+112,r5,tail8) + + stxv 32+v0,0(r11) + stxv 32+v1,16(r11) + stxv 32+v2,32(r11) + stxv 32+v3,48(r11) + stxv 32+v4,64(r11) + stxv 32+v5,80(r11) + stxv 32+v6,96(r11) + stxv 32+v7,112(r11) + + /* Align src pointer down to a 64B boundary. */ + addi r5,r4,512 + clrrdi r5,r5,6 + subf r7,r4,r5 + add r11,r3,r7 + +/* Switch to a more aggressive approach checking 64B each time. */ + .p2align 5 +L(strcpy_loop): + CHECK_64B(0,r5,tail_64b) + CHECK_64B(64,r5,tail_64b) + CHECK_64B(128,r5,tail_64b) + CHECK_64B(192,r5,tail_64b) + + CHECK_64B(256,r5,tail_64b) + CHECK_64B(256+64,r5,tail_64b) + CHECK_64B(256+128,r5,tail_64b) + CHECK_64B(256+192,r5,tail_64b) + addi r5,r5,512 + addi r11,r11,512 + + b L(strcpy_loop) + + .p2align 5 +L(tail_64b): + /* OK, we found a NUL byte. Let's look for it in the current 64-byte + block and mark it in its corresponding VR. */ + add r11,r11,r7 + vcmpequb. v8,v4,v18 + beq cr6,L(no_null_16B) + /* There's a NUL byte. */ + STORE_WITH_LEN(v8,v4,r11) +#ifdef USE_AS_STPCPY + add r3,r11,r8 +#endif + blr + +L(no_null_16B): + stxv 32+v4,0(r11) + vcmpequb. v8,v5,v18 + beq cr6,L(no_null_32B) + /* There's a NUL byte. */ + addi r11,r11,16 + STORE_WITH_LEN(v8,v5,r11) +#ifdef USE_AS_STPCPY + add r3,r11,r8 +#endif + blr - b L(loop) +L(no_null_32B): + stxv 32+v5,16(r11) + vcmpequb. v8,v6,v18 + beq cr6,L(no_null_48B) + /* There's a NUL byte. */ + addi r11,r11,32 + STORE_WITH_LEN(v8,v6,r11) +#ifdef USE_AS_STPCPY + add r3,r11,r8 +#endif + blr + +L(no_null_48B): + stxv 32+v6,32(r11) + vcmpequb. v8,v7,v18; + /* There's a NUL byte. */ + addi r11,r11,48 + STORE_WITH_LEN(v8,v7,r11) +#ifdef USE_AS_STPCPY + add r3,r11,r8 +#endif + blr .p2align 4 L(tail1): - vctzlsbb r8,v6 /* Number of trailing zeroes */ - addi r9,r8,1 /* Add null terminator */ - sldi r9,r9,56 /* stxvl wants size in top 8 bits */ - stxvl 32+v0,r11,r9 /* Partial store */ + /* There's a NUL byte. */ + STORE_WITH_LEN(v15,v0,r11) #ifdef USE_AS_STPCPY - /* stpcpy returns the dest address plus the size not counting the - final '\0'. */ add r3,r11,r8 #endif blr @@ -127,11 +280,9 @@ L(tail1): .p2align 4 L(tail2): stxv 32+v0,0(r11) - vctzlsbb r8,v6 - addi r9,r8,1 - sldi r9,r9,56 + /* There's a NUL byte. */ addi r11,r11,16 - stxvl 32+v1,r11,r9 + STORE_WITH_LEN(v15,v1,r11) #ifdef USE_AS_STPCPY add r3,r11,r8 #endif @@ -141,11 +292,8 @@ L(tail2): L(tail3): stxv 32+v0,0(r11) stxv 32+v1,16(r11) - vctzlsbb r8,v6 - addi r9,r8,1 - sldi r9,r9,56 addi r11,r11,32 - stxvl 32+v2,r11,r9 + STORE_WITH_LEN(v15,v2,r11) #ifdef USE_AS_STPCPY add r3,r11,r8 #endif @@ -156,11 +304,8 @@ L(tail4): stxv 32+v0,0(r11) stxv 32+v1,16(r11) stxv 32+v2,32(r11) - vctzlsbb r8,v6 - addi r9,r8,1 - sldi r9,r9,56 addi r11,r11,48 - stxvl 32+v3,r11,r9 + STORE_WITH_LEN(v15,v3,r11) #ifdef USE_AS_STPCPY add r3,r11,r8 #endif @@ -172,11 +317,8 @@ L(tail5): stxv 32+v1,16(r11) stxv 32+v2,32(r11) stxv 32+v3,48(r11) - vctzlsbb r8,v6 - addi r9,r8,1 - sldi r9,r9,56 addi r11,r11,64 - stxvl 32+v4,r11,r9 + STORE_WITH_LEN(v15,v4,r11) #ifdef USE_AS_STPCPY add r3,r11,r8 #endif @@ -189,11 +331,39 @@ L(tail6): stxv 32+v2,32(r11) stxv 32+v3,48(r11) stxv 32+v4,64(r11) - vctzlsbb r8,v6 - addi r9,r8,1 - sldi r9,r9,56 addi r11,r11,80 - stxvl 32+v5,r11,r9 + STORE_WITH_LEN(v15,v5,r11) +#ifdef USE_AS_STPCPY + add r3,r11,r8 +#endif + blr + + .p2align 4 +L(tail7): + stxv 32+v0,0(r11) + stxv 32+v1,16(r11) + stxv 32+v2,32(r11) + stxv 32+v3,48(r11) + stxv 32+v4,64(r11) + stxv 32+v5,80(r11) + addi r11,r11,96 + STORE_WITH_LEN(v15,v6,r11) +#ifdef USE_AS_STPCPY + add r3,r11,r8 +#endif + blr + + .p2align 4 +L(tail8): + stxv 32+v0,0(r11) + stxv 32+v1,16(r11) + stxv 32+v2,32(r11) + stxv 32+v3,48(r11) + stxv 32+v4,64(r11) + stxv 32+v5,80(r11) + stxv 32+v6,96(r11) + addi r11,r11,112 + STORE_WITH_LEN(v15,v7,r11) #ifdef USE_AS_STPCPY add r3,r11,r8 #endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c index d453b6535d..6eda080fe2 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c index 0a77812b38..b9cbce2d92 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c index fd3b08d7e7..67f7d4052c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c index a848fcd8b4..f3bccf297c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c index a31b918879..3b0ea87ec2 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c @@ -13,7 +13,7 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c index 7f4c9f1d4b..b467fdecc1 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c @@ -8,12 +8,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c index af855e6a5c..6e4e2ea14b 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c @@ -9,12 +9,12 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/ >. */ + <https://www.gnu.org/licenses/>. */ /* Define multiple versions only for definition in libc. */ #if IS_IN (libc) diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h index c363939e1a..c439b06121 100644 --- a/sysdeps/powerpc/powerpc64/sysdep.h +++ b/sysdeps/powerpc/powerpc64/sysdep.h @@ -353,6 +353,25 @@ LT_LABELSUFFIX(name,_name_end): ; \ DO_CALL (SYS_ify (syscall_name)) #ifdef SHARED +# define TAIL_CALL_NO_RETURN(__func) \ + b JUMPTARGET (NOTOC (__func)) +#else +# define TAIL_CALL_NO_RETURN(__func) \ + .ifdef .Local ## __func; \ + b .Local ## __func; \ + .else; \ +.Local ## __func: \ + mflr 0; \ + std 0,FRAME_LR_SAVE(1); \ + stdu 1,-FRAME_MIN_SIZE(1); \ + cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \ + cfi_offset(lr,FRAME_LR_SAVE); \ + bl JUMPTARGET(__func); \ + nop; \ + .endif +#endif + +#ifdef SHARED #define TAIL_CALL_SYSCALL_ERROR \ b JUMPTARGET (NOTOC (__syscall_error)) #else diff --git a/sysdeps/pthread/tst-cancel2.c b/sysdeps/pthread/tst-cancel2.c index ac38b50115..b4f7098235 100644 --- a/sysdeps/pthread/tst-cancel2.c +++ b/sysdeps/pthread/tst-cancel2.c @@ -32,6 +32,10 @@ tf (void *arg) char buf[100000]; while (write (fd[1], buf, sizeof (buf)) > 0); + /* The write can return -1/EPIPE if the pipe was closed before the + thread calls write, which signals a side-effect that must be + signaled to the thread. */ + pthread_testcancel (); return (void *) 42l; } diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps index 9ad64d1d85..4adad138b0 100644 --- a/sysdeps/riscv/nofpu/libm-test-ulps +++ b/sysdeps/riscv/nofpu/libm-test-ulps @@ -1361,22 +1361,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 5 -float: 5 ldouble: 5 Function: "tgamma_towardzero": double: 5 -float: 4 ldouble: 5 Function: "tgamma_upward": double: 4 -float: 4 ldouble: 4 Function: "y0": diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps index 1e6c092361..07a8794e48 100644 --- a/sysdeps/riscv/rvd/libm-test-ulps +++ b/sysdeps/riscv/rvd/libm-test-ulps @@ -976,24 +976,24 @@ float: 1 ldouble: 3 Function: "exp10m1": -double: 2 -float: 1 -ldouble: 1 +double: 4 +float: 2 +ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -ldouble: 3 +double: 2 +float: 3 +ldouble: 6 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 3 +double: 5 +float: 3 +ldouble: 6 Function: "exp2": double: 1 @@ -1013,24 +1013,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -ldouble: 1 +double: 2 +float: 2 +ldouble: 2 Function: "exp2m1_downward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 2 +double: 3 +float: 2 +ldouble: 4 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 5 Function: "exp_downward": double: 1 @@ -1206,24 +1206,24 @@ float: 2 ldouble: 1 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 2 +float: 3 +ldouble: 4 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 2 +ldouble: 3 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 4 Function: "log1p": double: 1 @@ -1431,22 +1431,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 8 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/riscv/start.S b/sysdeps/riscv/start.S index ede186ef23..fd6f1ca038 100644 --- a/sysdeps/riscv/start.S +++ b/sysdeps/riscv/start.S @@ -87,6 +87,7 @@ load_gp: ret .section .preinit_array,"aw" + .align PTRLOG .dc.a load_gp /* Define a symbol for the first piece of initialized data. */ diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps index 9ac3db4fa5..6e4084ae37 100644 --- a/sysdeps/s390/fpu/libm-test-ulps +++ b/sysdeps/s390/fpu/libm-test-ulps @@ -977,24 +977,24 @@ float: 1 ldouble: 3 Function: "exp10m1": -double: 2 -float: 1 -ldouble: 1 +double: 4 +float: 2 +ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -ldouble: 3 +double: 2 +float: 3 +ldouble: 6 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 3 +double: 5 +float: 3 +ldouble: 6 Function: "exp2": double: 1 @@ -1014,24 +1014,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -ldouble: 1 +double: 2 +float: 2 +ldouble: 2 Function: "exp2m1_downward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 3 Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 2 +double: 3 +float: 2 +ldouble: 4 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 5 Function: "exp_downward": double: 1 @@ -1207,24 +1207,24 @@ float: 2 ldouble: 1 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 2 +float: 3 +ldouble: 4 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 2 +ldouble: 3 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 4 Function: "log1p": double: 1 @@ -1429,22 +1429,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c index 51dd979607..9ac8a82df8 100644 --- a/sysdeps/s390/utf16-utf32-z9.c +++ b/sysdeps/s390/utf16-utf32-z9.c @@ -397,8 +397,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ @@ -707,8 +707,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ diff --git a/sysdeps/s390/utf8-utf16-z9.c b/sysdeps/s390/utf8-utf16-z9.c index 421747f058..d19fc3c97b 100644 --- a/sysdeps/s390/utf8-utf16-z9.c +++ b/sysdeps/s390/utf8-utf16-z9.c @@ -170,8 +170,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput __asm__ ("8") = inptr; \ register size_t inlen __asm__ ("9") = inend - inptr; \ - register unsigned char* pOutput __asm__ ("10") = outptr; \ - register size_t outlen __asm__("11") = outend - outptr; \ + register unsigned char* pOutput __asm__ ("6") = outptr; \ + register size_t outlen __asm__("7") = outend - outptr; \ unsigned long cc = 0; \ \ __asm__ __volatile__ (".machine push \n\t" \ @@ -242,8 +242,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ @@ -818,8 +818,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c index 9c4189a5f0..e4323d634c 100644 --- a/sysdeps/s390/utf8-utf32-z9.c +++ b/sysdeps/s390/utf8-utf32-z9.c @@ -170,8 +170,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput __asm__ ("8") = inptr; \ register size_t inlen __asm__ ("9") = inend - inptr; \ - register unsigned char* pOutput __asm__ ("10") = outptr; \ - register size_t outlen __asm__("11") = outend - outptr; \ + register unsigned char* pOutput __asm__ ("6") = outptr; \ + register size_t outlen __asm__("7") = outend - outptr; \ unsigned long cc = 0; \ \ __asm__ __volatile__ (".machine push \n\t" \ @@ -475,8 +475,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm("7") = outend - outptr; \ unsigned long tmp, tmp2, tmp3; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ @@ -864,8 +864,8 @@ gconv_end (struct __gconv_step *data) { \ register const unsigned char* pInput asm ("8") = inptr; \ register size_t inlen asm ("9") = inend - inptr; \ - register unsigned char* pOutput asm ("10") = outptr; \ - register size_t outlen asm ("11") = outend - outptr; \ + register unsigned char* pOutput asm ("6") = outptr; \ + register size_t outlen asm ("7") = outend - outptr; \ unsigned long tmp, tmp2; \ asm volatile (".machine push\n\t" \ ".machine \"z13\"\n\t" \ diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps index 3c84259941..36f21ed395 100644 --- a/sysdeps/sh/libm-test-ulps +++ b/sysdeps/sh/libm-test-ulps @@ -532,11 +532,9 @@ float: 2 Function: "tgamma": double: 9 -float: 8 Function: "tgamma_towardzero": double: 9 -float: 7 Function: "y0": double: 3 diff --git a/sysdeps/sh/sysdep.h b/sysdeps/sh/sysdep.h index 0c9e5626e9..377d29b950 100644 --- a/sysdeps/sh/sysdep.h +++ b/sysdeps/sh/sysdep.h @@ -24,6 +24,7 @@ #define ALIGNARG(log2) log2 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name +#define L(label) .L##label #ifdef SHARED #define PLTJMP(_x) _x##@PLT diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps index 0142357b3f..ccb6c794e7 100644 --- a/sysdeps/sparc/fpu/libm-test-ulps +++ b/sysdeps/sparc/fpu/libm-test-ulps @@ -977,24 +977,24 @@ float: 1 ldouble: 3 Function: "exp10m1": -double: 2 -float: 1 -ldouble: 1 +double: 3 +float: 2 +ldouble: 3 Function: "exp10m1_downward": -double: 1 -float: 1 -ldouble: 3 +double: 3 +float: 3 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -ldouble: 3 +double: 2 +float: 3 +ldouble: 6 Function: "exp10m1_upward": -double: 3 -float: 1 -ldouble: 3 +double: 4 +float: 4 +ldouble: 6 Function: "exp2": double: 1 @@ -1017,24 +1017,24 @@ float: 1 ldouble: 2 Function: "exp2m1": -double: 1 -float: 1 -ldouble: 1 - -Function: "exp2m1_downward": double: 2 -float: 1 +float: 2 ldouble: 2 +Function: "exp2m1_downward": +double: 3 +float: 3 +ldouble: 3 + Function: "exp2m1_towardzero": -double: 2 -float: 1 -ldouble: 2 +double: 3 +float: 2 +ldouble: 4 Function: "exp2m1_upward": -double: 1 -float: 1 -ldouble: 2 +double: 3 +float: 3 +ldouble: 5 Function: "exp_downward": double: 1 @@ -1212,23 +1212,23 @@ ldouble: 1 Function: "log10p1": double: 1 -float: 1 +float: 2 ldouble: 3 Function: "log10p1_downward": double: 2 -float: 1 -ldouble: 2 +float: 3 +ldouble: 4 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -ldouble: 2 +ldouble: 3 Function: "log10p1_upward": double: 2 -float: 1 -ldouble: 3 +float: 3 +ldouble: 4 Function: "log1p": double: 1 @@ -1444,22 +1444,18 @@ ldouble: 3 Function: "tgamma": double: 9 -float: 8 ldouble: 4 Function: "tgamma_downward": double: 9 -float: 7 ldouble: 5 Function: "tgamma_towardzero": double: 9 -float: 7 ldouble: 5 Function: "tgamma_upward": double: 9 -float: 8 ldouble: 4 Function: "y0": diff --git a/sysdeps/unix/sysdep.h b/sysdeps/unix/sysdep.h index a19e84165b..adc8d71f49 100644 --- a/sysdeps/unix/sysdep.h +++ b/sysdeps/unix/sysdep.h @@ -24,6 +24,9 @@ #define SYSCALL__(name, args) PSEUDO (__##name, name, args) #define SYSCALL(name, args) PSEUDO (name, name, args) +#ifndef __ASSEMBLER__ +# include <errno.h> + #define __SYSCALL_CONCAT_X(a,b) a##b #define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X (a, b) @@ -108,42 +111,148 @@ #define INLINE_SYSCALL_CALL(...) \ __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__) -#if IS_IN (rtld) -/* All cancellation points are compiled out in the dynamic loader. */ -# define NO_SYSCALL_CANCEL_CHECKING 1 +#define __INTERNAL_SYSCALL_NCS0(name) \ + INTERNAL_SYSCALL_NCS (name, 0) +#define __INTERNAL_SYSCALL_NCS1(name, a1) \ + INTERNAL_SYSCALL_NCS (name, 1, a1) +#define __INTERNAL_SYSCALL_NCS2(name, a1, a2) \ + INTERNAL_SYSCALL_NCS (name, 2, a1, a2) +#define __INTERNAL_SYSCALL_NCS3(name, a1, a2, a3) \ + INTERNAL_SYSCALL_NCS (name, 3, a1, a2, a3) +#define __INTERNAL_SYSCALL_NCS4(name, a1, a2, a3, a4) \ + INTERNAL_SYSCALL_NCS (name, 4, a1, a2, a3, a4) +#define __INTERNAL_SYSCALL_NCS5(name, a1, a2, a3, a4, a5) \ + INTERNAL_SYSCALL_NCS (name, 5, a1, a2, a3, a4, a5) +#define __INTERNAL_SYSCALL_NCS6(name, a1, a2, a3, a4, a5, a6) \ + INTERNAL_SYSCALL_NCS (name, 6, a1, a2, a3, a4, a5, a6) +#define __INTERNAL_SYSCALL_NCS7(name, a1, a2, a3, a4, a5, a6, a7) \ + INTERNAL_SYSCALL_NCS (name, 7, a1, a2, a3, a4, a5, a6, a7) + +/* Issue a syscall defined by syscall number plus any other argument required. + It is similar to INTERNAL_SYSCALL_NCS macro, but without the need to pass + the expected argument number as third parameter. */ +#define INTERNAL_SYSCALL_NCS_CALL(...) \ + __INTERNAL_SYSCALL_DISP (__INTERNAL_SYSCALL_NCS, __VA_ARGS__) + +/* Cancellation macros. */ +#include <syscall_types.h> + +/* Adjust both the __syscall_cancel and the SYSCALL_CANCEL macro to support + 7 arguments instead of default 6 (curently only mip32). It avoid add + the requirement to each architecture to support 7 argument macros + {INTERNAL,INLINE}_SYSCALL. */ +#ifdef HAVE_CANCELABLE_SYSCALL_WITH_7_ARGS +# define __SYSCALL_CANCEL7_ARG_DEF __syscall_arg_t a7, +# define __SYSCALL_CANCEL7_ARCH_ARG_DEF ,__syscall_arg_t a7 +# define __SYSCALL_CANCEL7_ARG 0, +# define __SYSCALL_CANCEL7_ARG7 a7, +# define __SYSCALL_CANCEL7_ARCH_ARG7 , a7 #else -# define NO_SYSCALL_CANCEL_CHECKING SINGLE_THREAD_P +# define __SYSCALL_CANCEL7_ARG_DEF +# define __SYSCALL_CANCEL7_ARCH_ARG_DEF +# define __SYSCALL_CANCEL7_ARG +# define __SYSCALL_CANCEL7_ARG7 +# define __SYSCALL_CANCEL7_ARCH_ARG7 #endif +long int __internal_syscall_cancel (__syscall_arg_t a1, __syscall_arg_t a2, + __syscall_arg_t a3, __syscall_arg_t a4, + __syscall_arg_t a5, __syscall_arg_t a6, + __SYSCALL_CANCEL7_ARG_DEF + __syscall_arg_t nr) attribute_hidden; -#define SYSCALL_CANCEL(...) \ - ({ \ - long int sc_ret; \ - if (NO_SYSCALL_CANCEL_CHECKING) \ - sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \ - else \ - { \ - int sc_cancel_oldtype = LIBC_CANCEL_ASYNC (); \ - sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \ - LIBC_CANCEL_RESET (sc_cancel_oldtype); \ - } \ - sc_ret; \ - }) +long int __syscall_cancel (__syscall_arg_t arg1, __syscall_arg_t arg2, + __syscall_arg_t arg3, __syscall_arg_t arg4, + __syscall_arg_t arg5, __syscall_arg_t arg6, + __SYSCALL_CANCEL7_ARG_DEF + __syscall_arg_t nr) attribute_hidden; -/* Issue a syscall defined by syscall number plus any other argument - required. Any error will be returned unmodified (including errno). */ -#define INTERNAL_SYSCALL_CANCEL(...) \ - ({ \ - long int sc_ret; \ - if (NO_SYSCALL_CANCEL_CHECKING) \ - sc_ret = INTERNAL_SYSCALL_CALL (__VA_ARGS__); \ - else \ - { \ - int sc_cancel_oldtype = LIBC_CANCEL_ASYNC (); \ - sc_ret = INTERNAL_SYSCALL_CALL (__VA_ARGS__); \ - LIBC_CANCEL_RESET (sc_cancel_oldtype); \ - } \ - sc_ret; \ - }) +#define __SYSCALL_CANCEL0(name) \ + __syscall_cancel (0, 0, 0, 0, 0, 0, __SYSCALL_CANCEL7_ARG __NR_##name) +#define __SYSCALL_CANCEL1(name, a1) \ + __syscall_cancel (__SSC (a1), 0, 0, 0, 0, 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __SYSCALL_CANCEL2(name, a1, a2) \ + __syscall_cancel (__SSC (a1), __SSC (a2), 0, 0, 0, 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __SYSCALL_CANCEL3(name, a1, a2, a3) \ + __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), 0, 0, 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __SYSCALL_CANCEL4(name, a1, a2, a3, a4) \ + __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), \ + __SSC(a4), 0, 0, __SYSCALL_CANCEL7_ARG __NR_##name) +#define __SYSCALL_CANCEL5(name, a1, a2, a3, a4, a5) \ + __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), __SSC(a4), \ + __SSC (a5), 0, __SYSCALL_CANCEL7_ARG __NR_##name) +#define __SYSCALL_CANCEL6(name, a1, a2, a3, a4, a5, a6) \ + __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), __SSC (a4), \ + __SSC (a5), __SSC (a6), __SYSCALL_CANCEL7_ARG \ + __NR_##name) +#define __SYSCALL_CANCEL7(name, a1, a2, a3, a4, a5, a6, a7) \ + __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), __SSC (a4), \ + __SSC (a5), __SSC (a6), __SSC (a7), __NR_##name) + +#define __SYSCALL_CANCEL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n +#define __SYSCALL_CANCEL_NARGS(...) \ + __SYSCALL_CANCEL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,) +#define __SYSCALL_CANCEL_CONCAT_X(a,b) a##b +#define __SYSCALL_CANCEL_CONCAT(a,b) __SYSCALL_CANCEL_CONCAT_X (a, b) +#define __SYSCALL_CANCEL_DISP(b,...) \ + __SYSCALL_CANCEL_CONCAT (b,__SYSCALL_CANCEL_NARGS(__VA_ARGS__))(__VA_ARGS__) + +/* Issue a cancellable syscall defined first argument plus any other argument + required. If and error occurs its value, the macro returns -1 and sets + errno accordingly. */ +#define __SYSCALL_CANCEL_CALL(...) \ + __SYSCALL_CANCEL_DISP (__SYSCALL_CANCEL, __VA_ARGS__) + +#define __INTERNAL_SYSCALL_CANCEL0(name) \ + __internal_syscall_cancel (0, 0, 0, 0, 0, 0, __SYSCALL_CANCEL7_ARG \ + __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL1(name, a1) \ + __internal_syscall_cancel (__SSC (a1), 0, 0, 0, 0, 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL2(name, a1, a2) \ + __internal_syscall_cancel (__SSC (a1), __SSC (a2), 0, 0, 0, 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL3(name, a1, a2, a3) \ + __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), 0, \ + 0, 0, __SYSCALL_CANCEL7_ARG __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL4(name, a1, a2, a3, a4) \ + __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), \ + __SSC(a4), 0, 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL5(name, a1, a2, a3, a4, a5) \ + __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), \ + __SSC(a4), __SSC (a5), 0, \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL6(name, a1, a2, a3, a4, a5, a6) \ + __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), \ + __SSC (a4), __SSC (a5), __SSC (a6), \ + __SYSCALL_CANCEL7_ARG __NR_##name) +#define __INTERNAL_SYSCALL_CANCEL7(name, a1, a2, a3, a4, a5, a6, a7) \ + __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), \ + __SSC (a4), __SSC (a5), __SSC (a6), \ + __SSC (a7), __NR_##name) + +/* Issue a cancellable syscall defined by syscall number NAME plus any other + argument required. If an error occurs its value is returned as an negative + number unmodified and errno is not set. */ +#define __INTERNAL_SYSCALL_CANCEL_CALL(...) \ + __SYSCALL_CANCEL_DISP (__INTERNAL_SYSCALL_CANCEL, __VA_ARGS__) + +#if IS_IN (rtld) +/* The loader does not need to handle thread cancellation, use direct + syscall instead. */ +# define INTERNAL_SYSCALL_CANCEL(...) INTERNAL_SYSCALL_CALL(__VA_ARGS__) +# define SYSCALL_CANCEL(...) INLINE_SYSCALL_CALL (__VA_ARGS__) +#else +# define INTERNAL_SYSCALL_CANCEL(...) \ + __INTERNAL_SYSCALL_CANCEL_CALL (__VA_ARGS__) +# define SYSCALL_CANCEL(...) \ + __SYSCALL_CANCEL_CALL (__VA_ARGS__) +#endif + +#endif /* __ASSEMBLER__ */ /* Machine-dependent sysdep.h files are expected to define the macro PSEUDO (function_name, syscall_name) to emit assembly code to define the diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 097b5a26fc..527c7a5ae8 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -95,6 +95,8 @@ sysdep_routines += \ process_vm_writev \ pselect32 \ readahead \ + sched_getattr \ + sched_setattr \ setfsgid \ setfsuid \ setvmaname \ @@ -206,6 +208,7 @@ tests += \ tst-getauxval \ tst-gettid \ tst-gettid-kill \ + tst-linux-mremap1 \ tst-memfd_create \ tst-misalign-clone \ tst-mlock2 \ @@ -223,9 +226,11 @@ tests += \ tst-process_mrelease \ tst-quota \ tst-rlimit-infinity \ + tst-sched_setattr \ tst-scm_rights \ tst-sigtimedwait \ tst-sync_file_range \ + tst-syscall-restart \ tst-sysconf-iov_max \ tst-sysvmsg-linux \ tst-sysvsem-linux \ @@ -394,6 +399,16 @@ $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py < /dev/null > $@ 2>&1; $(evaluate-test) $(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps) +tests-special += \ + $(objpfx)tst-sched-consts.out \ + # tests-special +$(objpfx)tst-sched-consts.out: ../sysdeps/unix/sysv/linux/tst-sched-consts.py + $(sysdeps-linux-python) \ + ../sysdeps/unix/sysv/linux/tst-sched-consts.py \ + $(sysdeps-linux-python-cc) \ + < /dev/null > $@ 2>&1; $(evaluate-test) +$(objpfx)tst-sched-consts.out: $(sysdeps-linux-python-deps) + tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 endif # $(subdir) == misc diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions index 268ba1b6ac..213ff5f1fe 100644 --- a/sysdeps/unix/sysv/linux/Versions +++ b/sysdeps/unix/sysv/linux/Versions @@ -328,6 +328,10 @@ libc { posix_spawnattr_getcgroup_np; posix_spawnattr_setcgroup_np; } + GLIBC_2.41 { + sched_getattr; + sched_setattr; + } GLIBC_PRIVATE { # functions used in other libraries __syscall_rt_sigqueueinfo; diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h index 7ee8a2167a..19b6316cb6 100644 --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h @@ -158,6 +158,7 @@ #define __NR_mq_timedsend 182 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist index 68eeca1c08..38db77e4f7 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist @@ -2748,3 +2748,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index b685106954..98687cae0d 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -128,3 +128,8 @@ GLIBC_2.40 _ZGVsMxvv_hypot F GLIBC_2.40 _ZGVsMxvv_hypotf F GLIBC_2.40 _ZGVsMxvv_pow F GLIBC_2.40 _ZGVsMxvv_powf F +GLIBC_2.41 _ZGVnN2v_logp1 F +GLIBC_2.41 _ZGVnN2v_logp1f F +GLIBC_2.41 _ZGVnN4v_logp1f F +GLIBC_2.41 _ZGVsMxv_logp1 F +GLIBC_2.41 _ZGVsMxv_logp1f F diff --git a/sysdeps/unix/sysv/linux/aarch64/syscall_cancel.S b/sysdeps/unix/sysv/linux/aarch64/syscall_cancel.S new file mode 100644 index 0000000000..e91a431b36 --- /dev/null +++ b/sysdeps/unix/sysv/linux/aarch64/syscall_cancel.S @@ -0,0 +1,59 @@ +/* Cancellable syscall wrapper. Linux/AArch64 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int [x0] __syscall_cancel_arch (int *cancelhandling [x0], + long int nr [x1], + long int arg1 [x2], + long int arg2 [x3], + long int arg3 [x4], + long int arg4 [x5], + long int arg5 [x6], + long int arg6 [x7]) */ + +ENTRY (__syscall_cancel_arch) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + ldr w0, [x0] + tbnz w0, TCB_CANCELED_BIT, 1f + + /* Issue a 6 argument syscall, the nr [x1] being the syscall + number. */ + mov x8, x1 + mov x0, x2 + mov x1, x3 + mov x2, x4 + mov x3, x5 + mov x4, x6 + mov x5, x7 + svc 0x0 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + ret + +1: + b __syscall_do_cancel + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h index 0f4ea7670b..216a5575c6 100644 --- a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h @@ -24,6 +24,7 @@ #define __NR_clock_nanosleep 422 #define __NR_clock_settime 419 #define __NR_clone 312 +#define __NR_clone3 545 #define __NR_close 6 #define __NR_close_range 546 #define __NR_connect 98 @@ -189,6 +190,7 @@ #define __NR_mq_timedsend 434 #define __NR_mq_unlink 433 #define __NR_mremap 341 +#define __NR_mseal 572 #define __NR_msgctl 200 #define __NR_msgget 201 #define __NR_msgrcv 202 diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist index 34c187b721..637bfce9fb 100644 --- a/sysdeps/unix/sysv/linux/alpha/libc.abilist +++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist @@ -3095,6 +3095,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/alpha/syscall_cancel.S b/sysdeps/unix/sysv/linux/alpha/syscall_cancel.S new file mode 100644 index 0000000000..377eef48be --- /dev/null +++ b/sysdeps/unix/sysv/linux/alpha/syscall_cancel.S @@ -0,0 +1,80 @@ +/* Cancellable syscall wrapper. Linux/alpha version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *ch, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + + .set noreorder + .set noat + .set nomacro +ENTRY (__syscall_cancel_arch) + .frame sp, 16, ra, 0 + .mask 0x4000000,-16 + cfi_startproc + ldah gp, 0(t12) + lda gp, 0(gp) + lda sp, -16(sp) + cfi_def_cfa_offset (16) + mov a1, v0 + stq ra, 0(sp) + cfi_offset (26, -16) + .prologue 1 + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + ldl t0, 0(a0) + addl zero, t0, t0 + /* if (*ch & CANCELED_BITMASK) */ + and t0, TCB_CANCELED_BITMASK, t0 + bne t0, 1f + mov a2, a0 + mov a3, a1 + mov a4, a2 + ldq a4, 16(sp) + mov a5, a3 + ldq a5, 24(sp) + .set macro + callsys + .set nomacro + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + subq zero, v0, t0 + ldq ra, 0(sp) + cmovne a3, t0, v0 + lda sp, 16(sp) + cfi_remember_state + cfi_restore (26) + cfi_def_cfa_offset (0) + ret zero, (ra), 1 + .align 4 +1: + cfi_restore_state + ldq t12, __syscall_do_cancel(gp) !literal!2 + jsr ra, (t12), __syscall_do_cancel !lituse_jsr!2 + cfi_endproc +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/arc/Makefile b/sysdeps/unix/sysv/linux/arc/Makefile index eca9a9fd3a..dd3da52c39 100644 --- a/sysdeps/unix/sysv/linux/arc/Makefile +++ b/sysdeps/unix/sysv/linux/arc/Makefile @@ -11,12 +11,3 @@ ifeq ($(subdir),misc) sysdep_headers += sys/cachectl.h sysdep_routines += cacheflush endif - -abi-variants := arcle arcbe - -ifeq (,$(filter $(default-abi),$(abi-variants))) -$(error Unknown ABI $(default-abi), must be one of $(abi-variants)) -endif - -abi-arcle-condition := !defined __BIG_ENDIAN__ -abi-arcbe-condition := defined __BIG_ENDIAN__ diff --git a/sysdeps/unix/sysv/linux/arc/arch-syscall.h b/sysdeps/unix/sysv/linux/arc/arch-syscall.h index 90359482a8..ea581b0a6d 100644 --- a/sysdeps/unix/sysv/linux/arc/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/arc/arch-syscall.h @@ -161,6 +161,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist index 916c18ea94..4a305cf730 100644 --- a/sysdeps/unix/sysv/linux/arc/libc.abilist +++ b/sysdeps/unix/sysv/linux/arc/libc.abilist @@ -2509,3 +2509,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/arc/shlib-versions b/sysdeps/unix/sysv/linux/arc/shlib-versions index 343c0a0450..62c1bd57b3 100644 --- a/sysdeps/unix/sysv/linux/arc/shlib-versions +++ b/sysdeps/unix/sysv/linux/arc/shlib-versions @@ -1,7 +1,3 @@ DEFAULT GLIBC_2.32 -%ifdef HAVE_ARC_BE -ld=ld-linux-arceb.so.2 -%else ld=ld-linux-arc.so.2 -%endif diff --git a/sysdeps/unix/sysv/linux/arc/syscall_cancel.S b/sysdeps/unix/sysv/linux/arc/syscall_cancel.S new file mode 100644 index 0000000000..fa02af4163 --- /dev/null +++ b/sysdeps/unix/sysv/linux/arc/syscall_cancel.S @@ -0,0 +1,56 @@ +/* Cancellable syscall wrapper. Linux/ARC version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + +ENTRY (__syscall_cancel_arch) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + ld_s r12,[r0] + bbit1 r12, TCB_CANCELED_BITMASK, 1f + mov_s r8, r1 + mov_s r0, r2 + mov_s r1, r3 + mov_s r2, r4 + mov_s r3, r5 + mov_s r4, r6 + mov_s r5, r7 + trap_s 0 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + j_s [blink] + + .align 4 +1: push_s blink + cfi_def_cfa_offset (4) + cfi_offset (31, -4) + bl @__syscall_do_cancel + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/arm/arch-syscall.h b/sysdeps/unix/sysv/linux/arm/arch-syscall.h index 4930167a03..2809f52f94 100644 --- a/sysdeps/unix/sysv/linux/arm/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/arm/arch-syscall.h @@ -205,6 +205,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 275 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 304 #define __NR_msgget 303 #define __NR_msgrcv 302 diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist index ea95de282a..1d54f71b14 100644 --- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist @@ -2801,6 +2801,8 @@ GLIBC_2.4 xdrstdio_create F GLIBC_2.4 xencrypt F GLIBC_2.4 xprt_register F GLIBC_2.4 xprt_unregister F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist index 1cdbc983e1..ff7e8bc40b 100644 --- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist @@ -2798,6 +2798,8 @@ GLIBC_2.4 xdrstdio_create F GLIBC_2.4 xencrypt F GLIBC_2.4 xprt_register F GLIBC_2.4 xprt_unregister F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/arm/syscall_cancel.S b/sysdeps/unix/sysv/linux/arm/syscall_cancel.S new file mode 100644 index 0000000000..6b899306e3 --- /dev/null +++ b/sysdeps/unix/sysv/linux/arm/syscall_cancel.S @@ -0,0 +1,78 @@ +/* Cancellable syscall wrapper. Linux/arm version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int [r0] __syscall_cancel_arch (int *cancelhandling [r0], + long int nr [r1], + long int arg1 [r2], + long int arg2 [r3], + long int arg3 [SP], + long int arg4 [SP+4], + long int arg5 [SP+8], + long int arg6 [SP+12]) */ + + .syntax unified + +ENTRY (__syscall_cancel_arch) + .fnstart + mov ip, sp + stmfd sp!, {r4, r5, r6, r7, lr} + .save {r4, r5, r6, r7, lr} + + cfi_adjust_cfa_offset (20) + cfi_rel_offset (r4, 0) + cfi_rel_offset (r5, 4) + cfi_rel_offset (r6, 8) + cfi_rel_offset (r7, 12) + cfi_rel_offset (lr, 16) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + ldr r0, [r0] + tst r0, #TCB_CANCELED_BITMASK + bne 1f + + /* Issue a 6 argument syscall, the nr [r1] being the syscall + number. */ + mov r7, r1 + mov r0, r2 + mov r1, r3 + ldmfd ip, {r2, r3, r4, r5, r6} + svc 0x0 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + ldmfd sp!, {r4, r5, r6, r7, lr} + cfi_adjust_cfa_offset (-20) + cfi_restore (r4) + cfi_restore (r5) + cfi_restore (r6) + cfi_restore (r7) + cfi_restore (lr) + BX (lr) + +1: + ldmfd sp!, {r4, r5, r6, r7, lr} + b __syscall_do_cancel + .fnend +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h index 628612b885..61c100d984 100644 --- a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +++ b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h @@ -200,6 +200,7 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notifications on a directory. */ +# define F_DUPFD_QUERY 1027 /* Compare two file descriptors for sameness. */ # define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ # define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ # define F_ADD_SEALS 1033 /* Add seals to file. */ diff --git a/sysdeps/unix/sysv/linux/bits/mman-linux.h b/sysdeps/unix/sysv/linux/bits/mman-linux.h index 522333c50a..161a885096 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-linux.h +++ b/sysdeps/unix/sysv/linux/bits/mman-linux.h @@ -43,6 +43,7 @@ #define MAP_PRIVATE 0x02 /* Changes are private. */ #define MAP_SHARED_VALIDATE 0x03 /* Share changes and validate extension flags. */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ #define MAP_TYPE 0x0f /* Mask for type of mapping. */ /* Other flags. */ diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h index a53e1362a0..a02cb69de7 100644 --- a/sysdeps/unix/sysv/linux/bits/sched.h +++ b/sysdeps/unix/sysv/linux/bits/sched.h @@ -29,15 +29,58 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #ifdef __USE_GNU +# define SCHED_NORMAL SCHED_OTHER # define SCHED_BATCH 3 # define SCHED_ISO 4 # define SCHED_IDLE 5 # define SCHED_DEADLINE 6 +/* Flags that can be used in policy values. */ # define SCHED_RESET_ON_FORK 0x40000000 -#endif -#ifdef __USE_GNU +/* Flags for the sched_flags field in struct sched_attr. */ +#define SCHED_FLAG_RESET_ON_FORK 0x01 +#define SCHED_FLAG_RECLAIM 0x02 +#define SCHED_FLAG_DL_OVERRUN 0x04 +#define SCHED_FLAG_KEEP_POLICY 0x08 +#define SCHED_FLAG_KEEP_PARAMS 0x10 +#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20 +#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 + +/* Combinations of sched_flags fields. */ +#define SCHED_FLAG_KEEP_ALL 0x18 +#define SCHED_FLAG_UTIL_CLAMP 0x60 + +/* Use "" to work around incorrect macro expansion of the + __has_include argument (GCC PR 80005). */ +# ifdef __has_include +# if __has_include ("linux/sched/types.h") +/* Some older Linux versions defined sched_param in <linux/sched/types.h>. */ +# define sched_param __glibc_mask_sched_param +# include <linux/sched/types.h> +# undef sched_param +# endif +# endif +# ifndef SCHED_ATTR_SIZE_VER0 +# include <linux/types.h> +# define SCHED_ATTR_SIZE_VER0 48 +# define SCHED_ATTR_SIZE_VER1 56 +struct sched_attr +{ + __u32 size; + __u32 sched_policy; + __u64 sched_flags; + __s32 sched_nice; + __u32 sched_priority; + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; + __u32 sched_util_min; + __u32 sched_util_max; + /* Additional fields may be added at the end. */ +}; +# endif /* !SCHED_ATTR_SIZE_VER0 */ + /* Cloning flags. */ # define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */ # define CLONE_VM 0x00000100 /* Set if VM shared between processes. */ @@ -97,6 +140,17 @@ extern int getcpu (unsigned int *, unsigned int *) __THROW; /* Switch process to namespace of type NSTYPE indicated by FD. */ extern int setns (int __fd, int __nstype) __THROW; + +/* Apply the scheduling attributes from *ATTR to the process or thread TID. */ +int sched_setattr (pid_t tid, struct sched_attr *attr, unsigned int flags) + __THROW __nonnull ((2)); + +/* Obtain the scheduling attributes of the process or thread TID and + store it in *ATTR. */ +int sched_getattr (pid_t tid, struct sched_attr *attr, unsigned int size, + unsigned int flags) + __THROW __nonnull ((2)) __attr_access ((__write_only__, 2, 3)); + #endif __END_DECLS diff --git a/sysdeps/unix/sysv/linux/bits/uio-ext.h b/sysdeps/unix/sysv/linux/bits/uio-ext.h index ead7a09156..85ed21bac5 100644 --- a/sysdeps/unix/sysv/linux/bits/uio-ext.h +++ b/sysdeps/unix/sysv/linux/bits/uio-ext.h @@ -48,6 +48,8 @@ extern ssize_t process_vm_writev (pid_t __pid, const struct iovec *__lvec, #define RWF_NOWAIT 0x00000008 /* per-IO nonblocking mode. */ #define RWF_APPEND 0x00000010 /* per-IO O_APPEND. */ #define RWF_NOAPPEND 0x00000020 /* per-IO negation of O_APPEND */ +#define RWF_ATOMIC 0x00000040 /* Write is to be issued with torn-write + prevention. */ __END_DECLS diff --git a/sysdeps/unix/sysv/linux/csky/arch-syscall.h b/sysdeps/unix/sysv/linux/csky/arch-syscall.h index 3f16a29f57..ede3551a00 100644 --- a/sysdeps/unix/sysv/linux/csky/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/csky/arch-syscall.h @@ -168,6 +168,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 @@ -301,7 +302,7 @@ #define __NR_swapon 224 #define __NR_symlinkat 36 #define __NR_sync 81 -#define __NR_sync_file_range 84 +#define __NR_sync_file_range2 84 #define __NR_syncfs 267 #define __NR_sysinfo 179 #define __NR_syslog 116 diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist index 96d45961e2..c3ed65467d 100644 --- a/sysdeps/unix/sysv/linux/csky/libc.abilist +++ b/sysdeps/unix/sysv/linux/csky/libc.abilist @@ -2785,3 +2785,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/csky/syscall_cancel.S b/sysdeps/unix/sysv/linux/csky/syscall_cancel.S new file mode 100644 index 0000000000..2989765f8c --- /dev/null +++ b/sysdeps/unix/sysv/linux/csky/syscall_cancel.S @@ -0,0 +1,114 @@ +/* Cancellable syscall wrapper. Linux/csky version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + +#ifdef SHARED +# define STACK_ADJ 4 +#else +# define STACK_ADJ 0 +#endif + +ENTRY (__syscall_cancel_arch) + subi sp, sp, 16 + STACK_ADJ + cfi_def_cfa_offset (16 + STACK_ADJ) +#ifdef SHARED + st.w gb, (sp, 16) + lrw t1, 1f@GOTPC + cfi_offset (gb, -4) + grs gb, 1f +1: +#endif + st.w lr, (sp, 12) + st.w l3, (sp, 8) + st.w l1, (sp, 4) + st.w l0, (sp, 0) +#ifdef SHARED + addu gb, gb, t1 +#endif + subi sp, sp, 16 + cfi_def_cfa_offset (32 + STACK_ADJ) + cfi_offset (lr, -( 4 + STACK_ADJ)) + cfi_offset (l3, -( 8 + STACK_ADJ)) + cfi_offset (l1, -(12 + STACK_ADJ)) + cfi_offset (l0, -(16 + STACK_ADJ)) + + mov l3, a1 + mov a1, a3 + ld.w a3, (sp, 32 + STACK_ADJ) + st.w a3, (sp, 0) + ld.w a3, (sp, 36 + STACK_ADJ) + st.w a3, (sp, 4) + ld.w a3, (sp, 40 + STACK_ADJ) + st.w a3, (sp, 8) + ld.w a3, (sp, 44 + STACK_ADJ) + st.w a3, (sp, 12) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + ld.w t0, (a0, 0) + andi t0, t0, TCB_CANCELED_BITMASK + jbnez t0, 2f + mov a0, a2 + ld.w a3, (sp, 4) + ld.w a2, (sp, 0) + ld.w l0, (sp, 8) + ld.w l1, (sp, 12) + trap 0 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + addi sp, sp, 16 + cfi_remember_state + cfi_def_cfa_offset (16 + STACK_ADJ) +#ifdef SHARED + ld.w gb, (sp, 16) + cfi_restore (gb) +#endif + ld.w lr, (sp, 12) + cfi_restore (lr) + ld.w l3, (sp, 8) + cfi_restore (l3) + ld.w l1, (sp, 4) + cfi_restore (l1) + ld.w l0, (sp, 0) + cfi_restore (l0) + addi sp, sp, 16 + cfi_def_cfa_offset (0) + rts + +2: + cfi_restore_state +#ifdef SHARED + lrw a3, __syscall_do_cancel@GOTOFF + addu a3, a3, gb + jsr a3 +#else + jbsr __syscall_do_cancel +#endif +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/getdents64.c b/sysdeps/unix/sysv/linux/getdents64.c index 227fbf21ae..795bd935f0 100644 --- a/sysdeps/unix/sysv/linux/getdents64.c +++ b/sysdeps/unix/sysv/linux/getdents64.c @@ -33,100 +33,3 @@ __getdents64 (int fd, void *buf, size_t nbytes) } libc_hidden_def (__getdents64) weak_alias (__getdents64, getdents64) - -#if _DIRENT_MATCHES_DIRENT64 -strong_alias (__getdents64, __getdents) -#else -# include <shlib-compat.h> - -# if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) -# include <olddirent.h> -# include <unistd.h> - -static ssize_t -handle_overflow (int fd, __off64_t offset, ssize_t count) -{ - /* If this is the first entry in the buffer, we can report the - error. */ - if (offset == 0) - { - __set_errno (EOVERFLOW); - return -1; - } - - /* Otherwise, seek to the overflowing entry, so that the next call - will report the error, and return the data read so far. */ - if (__lseek64 (fd, offset, SEEK_SET) != 0) - return -1; - return count; -} - -ssize_t -__old_getdents64 (int fd, char *buf, size_t nbytes) -{ - /* We do not move the individual directory entries. This is only - possible if the target type (struct __old_dirent64) is smaller - than the source type. */ - _Static_assert (offsetof (struct __old_dirent64, d_name) - <= offsetof (struct dirent64, d_name), - "__old_dirent64 is larger than dirent64"); - _Static_assert (__alignof__ (struct __old_dirent64) - <= __alignof__ (struct dirent64), - "alignment of __old_dirent64 is larger than dirent64"); - - ssize_t retval = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes); - if (retval > 0) - { - /* This is the marker for the first entry. Offset 0 is reserved - for the first entry (see rewinddir). Here, we use it as a - marker for the first entry in the buffer. We never actually - seek to offset 0 because handle_overflow reports the error - directly, so it does not matter that the offset is incorrect - if entries have been read from the descriptor before (so that - the descriptor is not actually at offset 0). */ - __off64_t previous_offset = 0; - - char *p = buf; - char *end = buf + retval; - while (p < end) - { - struct dirent64 *source = (struct dirent64 *) p; - - /* Copy out the fixed-size data. */ - __ino_t ino = source->d_ino; - __off64_t offset = source->d_off; - unsigned int reclen = source->d_reclen; - unsigned char type = source->d_type; - - /* Check for ino_t overflow. */ - if (__glibc_unlikely (ino != source->d_ino)) - return handle_overflow (fd, previous_offset, p - buf); - - /* Convert to the target layout. Use a separate struct and - memcpy to side-step aliasing issues. */ - struct __old_dirent64 result; - result.d_ino = ino; - result.d_off = offset; - result.d_reclen = reclen; - result.d_type = type; - - /* Write the fixed-sized part of the result to the - buffer. */ - size_t result_name_offset = offsetof (struct __old_dirent64, d_name); - memcpy (p, &result, result_name_offset); - - /* Adjust the position of the name if necessary. Copy - everything until the end of the record, including the - terminating NUL byte. */ - if (result_name_offset != offsetof (struct dirent64, d_name)) - memmove (p + result_name_offset, source->d_name, - reclen - offsetof (struct dirent64, d_name)); - - p += reclen; - previous_offset = offset; - } - } - return retval; -} -# endif /* SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) */ -#endif /* _DIRENT_MATCHES_DIRENT64 */ diff --git a/sysdeps/unix/sysv/linux/gettimeofday.c b/sysdeps/unix/sysv/linux/gettimeofday.c index 7ab147c614..5ca8916611 100644 --- a/sysdeps/unix/sysv/linux/gettimeofday.c +++ b/sysdeps/unix/sysv/linux/gettimeofday.c @@ -37,11 +37,11 @@ __gettimeofday_syscall (struct timeval *restrict tv, void *restrict tz) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_gettimeofday = dl_vdso_vsym (HAVE_GETTIMEOFDAY_VSYSCALL) +# define INIT_ARCH() libc_ifunc (__gettimeofday, - vdso_gettimeofday ? VDSO_IFUNC_RET (vdso_gettimeofday) - : (void *) __gettimeofday_syscall) + GLRO(dl_vdso_gettimeofday) != NULL + ? VDSO_IFUNC_RET (GLRO(dl_vdso_gettimeofday)) + : (void *) __gettimeofday_syscall) # else int diff --git a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h index a1b2c819d6..08b153f2cc 100644 --- a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h @@ -197,6 +197,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 230 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 191 #define __NR_msgget 190 #define __NR_msgrcv 189 diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist index fbcd60c2b3..8de7644a59 100644 --- a/sysdeps/unix/sysv/linux/hppa/libc.abilist +++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist @@ -2821,6 +2821,8 @@ GLIBC_2.4 sys_errlist D 0x400 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/hppa/syscall_cancel.S b/sysdeps/unix/sysv/linux/hppa/syscall_cancel.S new file mode 100644 index 0000000000..b9c19747ea --- /dev/null +++ b/sysdeps/unix/sysv/linux/hppa/syscall_cancel.S @@ -0,0 +1,81 @@ +/* Cancellable syscall wrapper. Linux/hppa version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + long int nr, + long int arg1, + long int arg2, + long int arg3, + long int arg4, + long int arg5, + long int arg6) */ + + .text +ENTRY(__syscall_cancel_arch) + stw %r2,-20(%r30) + ldo 128(%r30),%r30 + cfi_def_cfa_offset (-128) + cfi_offset (2, -20) + ldw -180(%r30),%r28 + copy %r26,%r20 + stw %r28,-108(%r30) + ldw -184(%r30),%r28 + copy %r24,%r26 + stw %r28,-112(%r30) + ldw -188(%r30),%r28 + stw %r28,-116(%r30) + ldw -192(%r30),%r28 + stw %r4,-104(%r30) + stw %r28,-120(%r30) + copy %r25,%r28 + copy %r23,%r25 +#ifdef __PIC__ + stw %r19,-32(%r30) +#endif + cfi_offset (4, 24) + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + ldw 0(%r20),%r20 + bb,< %r20,31-TCB_CANCELED_BIT,1f + ldw -120(%r30),%r21 + ldw -116(%r30),%r22 + ldw -112(%r30),%r23 + ldw -108(%r30),%r24 + copy %r19, %r4 + ble 0x100(%sr2, %r0) + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + copy %r28,%r20 + copy %r4,%r19 + + ldw -148(%r30),%r2 + ldw -104(%r30),%r4 + bv %r0(%r2) + ldo -128(%r30),%r30 +1: + bl __syscall_do_cancel,%r2 + nop + nop + +END(__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/i386/arch-syscall.h b/sysdeps/unix/sysv/linux/i386/arch-syscall.h index cc775432d6..500ca1ec70 100644 --- a/sysdeps/unix/sysv/linux/i386/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/i386/arch-syscall.h @@ -222,6 +222,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 278 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist index c989b433c0..4fedf775d4 100644 --- a/sysdeps/unix/sysv/linux/i386/libc.abilist +++ b/sysdeps/unix/sysv/linux/i386/libc.abilist @@ -3005,6 +3005,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/i386/syscall_cancel.S b/sysdeps/unix/sysv/linux/i386/syscall_cancel.S new file mode 100644 index 0000000000..46fb746da0 --- /dev/null +++ b/sysdeps/unix/sysv/linux/i386/syscall_cancel.S @@ -0,0 +1,104 @@ +/* Cancellable syscall wrapper. Linux/i686 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int [eax] __syscall_cancel_arch (int *cancelhandling [SP], + long int nr [SP+4], + long int arg1 [SP+8], + long int arg2 [SP+12], + long int arg3 [SP+16], + long int arg4 [SP+20], + long int arg5 [SP+24], + long int arg6 [SP+28]) */ + +ENTRY (__syscall_cancel_arch) + pushl %ebp + cfi_def_cfa_offset (8) + cfi_offset (ebp, -8) + pushl %edi + cfi_def_cfa_offset (12) + cfi_offset (edi, -12) + pushl %esi + cfi_def_cfa_offset (16) + cfi_offset (esi, -16) + pushl %ebx + cfi_def_cfa_offset (20) + cfi_offset (ebx, -20) + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + movl 20(%esp), %eax + testb $TCB_CANCELED_BITMASK, (%eax) + jne 1f + + /* Issue a 6 argument syscall, the nr [%eax] being the syscall + number. */ + movl 24(%esp), %eax + movl 28(%esp), %ebx + movl 32(%esp), %ecx + movl 36(%esp), %edx + movl 40(%esp), %esi + movl 44(%esp), %edi + movl 48(%esp), %ebp + + /* We can not use the vDSO helper for syscall (__kernel_vsyscall) + because the returned PC from kernel will point to the vDSO page + instead of the expected __syscall_cancel_arch_{start,end} + marks. */ + int $0x80 + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + popl %ebx + cfi_restore (ebx) + cfi_def_cfa_offset (16) + popl %esi + cfi_restore (esi) + cfi_def_cfa_offset (12) + popl %edi + cfi_restore (edi) + cfi_def_cfa_offset (8) + popl %ebp + cfi_restore (ebp) + cfi_def_cfa_offset (4) + ret + +1: + /* Although the __syscall_do_cancel do not return, we need to stack + being set correctly for unwind. */ + popl %ebx + cfi_restore (ebx) + cfi_def_cfa_offset (16) + popl %esi + cfi_restore (esi) + cfi_def_cfa_offset (12) + popl %edi + cfi_restore (edi) + cfi_def_cfa_offset (8) + popl %ebp + cfi_restore (ebp) + cfi_def_cfa_offset (4) + jmp __syscall_do_cancel + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/include/bits/mman-shared.h b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h new file mode 100644 index 0000000000..7c14b5df3c --- /dev/null +++ b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h @@ -0,0 +1,16 @@ +#include <sysdeps/unix/sysv/linux/bits/mman-shared.h> + +#ifndef _ISOMAC + +extern __typeof (pkey_alloc) __pkey_alloc; +libc_hidden_proto (__pkey_alloc) +extern __typeof (pkey_free) __pkey_free; +libc_hidden_proto (__pkey_free) +extern __typeof (pkey_mprotect) __pkey_mprotect; +libc_hidden_proto (__pkey_mprotect) +extern __typeof (pkey_get) __pkey_get; +libc_hidden_proto (__pkey_get) +extern __typeof (pkey_set) __pkey_set; +libc_hidden_proto (__pkey_set) + +#endif diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h index a6fae59aaa..6e3a3d7692 100644 --- a/sysdeps/unix/sysv/linux/internal-signals.h +++ b/sysdeps/unix/sysv/linux/internal-signals.h @@ -90,6 +90,15 @@ internal_signal_restore_set (const internal_sigset_t *set) __NSIG_BYTES); } +static inline void +internal_signal_unblock_signal (int sig) +{ + internal_sigset_t set; + internal_sigemptyset (&set); + internal_sigaddset (&set, sig); + INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &set, NULL, + __NSIG_BYTES); +} /* It is used on timer_create code directly on sigwaitinfo call, so it can not use the internal_sigset_t definitions. */ diff --git a/sysdeps/unix/sysv/linux/internal-sigset.h b/sysdeps/unix/sysv/linux/internal-sigset.h index 5d7020b42d..4b19affd75 100644 --- a/sysdeps/unix/sysv/linux/internal-sigset.h +++ b/sysdeps/unix/sysv/linux/internal-sigset.h @@ -21,7 +21,7 @@ #include <sigsetops.h> -typedef struct +typedef struct _internal_sigset_t { unsigned long int __val[__NSIG_WORDS]; } internal_sigset_t; diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h index 56bb08718a..8bb82448a7 100644 --- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h @@ -155,6 +155,7 @@ #define __NR_mq_timedsend 182 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h new file mode 100644 index 0000000000..0062756b5c --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h @@ -0,0 +1,21 @@ +/* Regularize <asm/unistd.h> definitions. LoongArch version. + Copyright (C) 2024 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* To avoid the messy usage of the fstat, newfstatat, and statx system calls, we +only use statx. */ +#undef __NR_fstat +#undef __NR_newfstatat diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist index 0023ec1fa1..0024282289 100644 --- a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist +++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist @@ -2269,3 +2269,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/loongarch/syscall_cancel.S b/sysdeps/unix/sysv/linux/loongarch/syscall_cancel.S new file mode 100644 index 0000000000..edea9632ff --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/syscall_cancel.S @@ -0,0 +1,50 @@ +/* Cancellable syscall wrapper. Linux/loongarch version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +ENTRY (__syscall_cancel_arch) + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + ld.w t0, a0, 0 + andi t0, t0, TCB_CANCELED_BITMASK + bnez t0, 1f + + /* Issue a 6 argument syscall. */ + move t1, a1 + move a0, a2 + move a1, a3 + move a2, a4 + move a3, a5 + move a4, a6 + move a5, a7 + move a7, t1 + syscall 0 + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + jr ra +1: + b __syscall_do_cancel + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h index 79f277dd5b..4ab34f6228 100644 --- a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h @@ -213,6 +213,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 272 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist index d9bd6a9b56..142595eb3e 100644 --- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist @@ -2781,6 +2781,8 @@ GLIBC_2.4 xdrstdio_create F GLIBC_2.4 xencrypt F GLIBC_2.4 xprt_register F GLIBC_2.4 xprt_unregister F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist index 439796d693..85e7746c10 100644 --- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist @@ -2948,6 +2948,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/m68k/syscall_cancel.S b/sysdeps/unix/sysv/linux/m68k/syscall_cancel.S new file mode 100644 index 0000000000..8923bcc71c --- /dev/null +++ b/sysdeps/unix/sysv/linux/m68k/syscall_cancel.S @@ -0,0 +1,84 @@ +/* Cancellable syscall wrapper. Linux/m68k version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + + +ENTRY (__syscall_cancel_arch) +#ifdef __mcoldfire__ + lea (-16,%sp),%sp + movem.l %d2-%d5,(%sp) +#else + movem.l %d2-%d5,-(%sp) +#endif + cfi_def_cfa_offset (20) + cfi_offset (2, -20) + cfi_offset (3, -16) + cfi_offset (4, -12) + cfi_offset (5, -8) + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + move.l 20(%sp),%a0 + move.l (%a0),%d0 +#ifdef __mcoldfire__ + move.w %d0,%ccr + jeq 1f +#else + btst #TCB_CANCELED_BIT,%d0 + jne 1f +#endif + + move.l 48(%sp),%a0 + move.l 44(%sp),%d5 + move.l 40(%sp),%d4 + move.l 36(%sp),%d3 + move.l 32(%sp),%d2 + move.l 28(%sp),%d1 + move.l 24(%sp),%d0 + trap #0 + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + +#ifdef __mcoldfire__ + movem.l (%sp),%d2-%d5 + lea (16,%sp),%sp +#else + movem.l (%sp)+,%d2-%d5 +#endif + rts + +1: +#ifdef PIC + bsr.l __syscall_do_cancel +#else + jsr __syscall_do_cancel +#endif +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h index 779d5d5d70..79e225e50c 100644 --- a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h @@ -221,6 +221,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 278 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 331 #define __NR_msgget 332 #define __NR_msgrcv 333 diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist index 1069d3252c..91dc1b8378 100644 --- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist @@ -2834,3 +2834,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist index 17abe08c8b..3440e90f6f 100644 --- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist @@ -2831,3 +2831,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/microblaze/syscall_cancel.S b/sysdeps/unix/sysv/linux/microblaze/syscall_cancel.S new file mode 100644 index 0000000000..1f9d202bf5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/microblaze/syscall_cancel.S @@ -0,0 +1,61 @@ +/* Cancellable syscall wrapper. Linux/microblaze version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + long int nr, + long int arg1, + long int arg2, + long int arg3, + long int arg4, + long int arg5, + long int arg6) */ + +ENTRY (__syscall_cancel_arch) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + lwi r3,r5,0 + andi r3,r3,TCB_CANCELED_BITMASK + bneid r3,1f + addk r12,r6,r0 + + addk r5,r7,r0 + addk r6,r8,r0 + addk r7,r9,r0 + addk r8,r10,r0 + lwi r9,r1,56 + lwi r10,r1,60 + brki r14,8 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + nop + lwi r15,r1,0 + rtsd r15,8 + addik r1,r1,28 + +1: + brlid r15, __syscall_do_cancel + nop + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h index 86ffd5ce84..dadd7f3130 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h @@ -211,6 +211,7 @@ #define __NR_mq_timedsend_time64 4418 #define __NR_mq_unlink 4272 #define __NR_mremap 4167 +#define __NR_mseal 4462 #define __NR_msgctl 4402 #define __NR_msgget 4399 #define __NR_msgrcv 4401 diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist index 799e508950..5ee7b8c52f 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist @@ -2909,6 +2909,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist index 1c10996cbc..6cb6328e7c 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist @@ -2907,6 +2907,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/syscall_cancel.S b/sysdeps/unix/sysv/linux/mips/mips32/syscall_cancel.S new file mode 100644 index 0000000000..eb3b2ed005 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/mips32/syscall_cancel.S @@ -0,0 +1,128 @@ +/* Cancellable syscall wrapper. Linux/mips32 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/asm.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6, + __syscall_arg_t arg7) */ + +#define FRAME_SIZE 56 + +NESTED (__syscall_cancel_arch, FRAME_SIZE, fp) + .mask 0xc0070000,-SZREG + .fmask 0x00000000,0 + + PTR_ADDIU sp, -FRAME_SIZE + cfi_def_cfa_offset (FRAME_SIZE) + + sw fp, 48(sp) + sw ra, 52(sp) + sw s2, 44(sp) + sw s1, 40(sp) + sw s0, 36(sp) +#ifdef __PIC__ + .cprestore 16 +#endif + cfi_offset (ra, -4) + cfi_offset (fp, -8) + cfi_offset (s2, -12) + cfi_offset (s1, -16) + cfi_offset (s0, -20) + + move fp ,sp + cfi_def_cfa_register (fp) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + lw v0, 0(a0) + andi v0, v0, TCB_CANCELED_BITMASK + bne v0, zero, 2f + + addiu sp, sp, -16 + addiu v0, sp, 16 + sw v0, 24(fp) + + move s0, a1 + move a0, a2 + move a1, a3 + lw a2, 72(fp) + lw a3, 76(fp) + lw v0, 84(fp) + lw s1, 80(fp) + lw s2, 88(fp) + + .set noreorder + subu sp, 32 + sw s1, 16(sp) + sw v0, 20(sp) + sw s2, 24(sp) + move v0, s0 + syscall + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + addiu sp, sp, 32 + .set reorder + + beq a3, zero, 1f + subu v0, zero, v0 +1: + move sp, fp + cfi_remember_state + cfi_def_cfa_register (sp) + lw ra, 52(fp) + lw fp, 48(sp) + lw s2, 44(sp) + lw s1, 40(sp) + lw s0, 36(sp) + + .set noreorder + .set nomacro + jr ra + addiu sp,sp,FRAME_SIZE + + .set macro + .set reorder + + cfi_def_cfa_offset (0) + cfi_restore (s0) + cfi_restore (s1) + cfi_restore (s2) + cfi_restore (fp) + cfi_restore (ra) + +2: + cfi_restore_state +#ifdef __PIC__ + PTR_LA t9, __syscall_do_cancel + jalr t9 +#else + jal __syscall_do_cancel +#endif + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h index 1827caf595..47a1b97351 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h @@ -18,6 +18,10 @@ #ifndef _LINUX_MIPS_MIPS32_SYSDEP_H #define _LINUX_MIPS_MIPS32_SYSDEP_H 1 +/* mips32 have cancelable syscalls with 7 arguments (currently only + sync_file_range). */ +#define HAVE_CANCELABLE_SYSCALL_WITH_7_ARGS 1 + /* There is some commonality. */ #include <sysdeps/unix/sysv/linux/mips/sysdep.h> #include <sysdeps/unix/sysv/linux/sysdep.h> diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h index 5d37a686e5..db6b2d4609 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h @@ -194,6 +194,7 @@ #define __NR_mq_timedsend_time64 6418 #define __NR_mq_unlink 6235 #define __NR_mremap 6024 +#define __NR_mseal 6462 #define __NR_msgctl 6069 #define __NR_msgget 6066 #define __NR_msgrcv 6068 diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist index 03d9655f26..ae7474c0f0 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist @@ -2915,6 +2915,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/syscall_types.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/syscall_types.h new file mode 100644 index 0000000000..b3a8b0b634 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/syscall_types.h @@ -0,0 +1,28 @@ +/* Types and macros used for syscall issuing. MIPS64n32 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYSCALL_TYPES_H +#define _SYSCALL_TYPES_H + +typedef long long int __syscall_arg_t; + +/* Convert X to a long long, without losing any bits if it is one + already or warning if it is a 32-bit pointer. */ +#define __SSC(__x) ((__syscall_arg_t) (__typeof__ ((__x) - (__x))) (__x)) + +#endif diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h index 9b1e846e76..b4129a4dbd 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h @@ -183,6 +183,7 @@ #define __NR_mq_timedsend 5232 #define __NR_mq_unlink 5231 #define __NR_mremap 5024 +#define __NR_mseal 5462 #define __NR_msgctl 5069 #define __NR_msgget 5066 #define __NR_msgrcv 5068 diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist index 05e402ed30..cdf040dec2 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist @@ -2817,6 +2817,8 @@ GLIBC_2.4 renameat F GLIBC_2.4 symlinkat F GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall_cancel.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall_cancel.S new file mode 100644 index 0000000000..cfc0596b6a --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall_cancel.S @@ -0,0 +1,112 @@ +/* Cancellable syscall wrapper. Linux/mips64 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/asm.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6, + __syscall_arg_t arg7) */ + +#define FRAME_SIZE 32 + + .text +NESTED (__syscall_cancel_arch, FRAME_SIZE, ra) + .mask 0x90010000, -SZREG + .fmask 0x00000000, 0 + LONG_ADDIU sp, sp, -FRAME_SIZE + cfi_def_cfa_offset (FRAME_SIZE) + sd gp, 16(sp) + cfi_offset (gp, -16) + lui gp, %hi(%neg(%gp_rel(__syscall_cancel_arch))) + LONG_ADDU gp, gp, t9 + sd ra, 24(sp) + sd s0, 8(sp) + cfi_offset (ra, -8) + cfi_offset (s0, -24) + LONG_ADDIU gp, gp, %lo(%neg(%gp_rel(__syscall_cancel_arch))) + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + lw v0, 0(a0) + andi v0, v0, TCB_CANCELED_BITMASK + .set noreorder + .set nomacro + bne v0, zero, 2f + move s0, a1 + .set macro + .set reorder + + move a0, a2 + move a1, a3 + move a2, a4 + move a3, a5 + move a4, a6 + move a5, a7 + + .set noreorder + move v0, s0 + syscall + .set reorder + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + .set noreorder + .set nomacro +#if __mips_isa_rev >= 6 + beqzc $7, 1f +#else + bnel a3, zero, 1f +#endif + SUBU v0, zero, v0 + .set macro + .set reorder + +1: + ld ra, 24(sp) + ld gp, 16(sp) + ld s0, 8(sp) + + .set noreorder + .set nomacro + jr ra + LONG_ADDIU sp, sp, FRAME_SIZE + .set macro + .set reorder + + cfi_remember_state + cfi_def_cfa_offset (0) + cfi_restore (s0) + cfi_restore (gp) + cfi_restore (ra) + .align 3 +2: + cfi_restore_state + LONG_L t9, %got_disp(__syscall_do_cancel)(gp) + .reloc 3f, R_MIPS_JALR, __syscall_do_cancel +3: jalr t9 +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h index 0a1711dad2..0438bed23d 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h @@ -44,15 +44,7 @@ #undef HAVE_INTERNAL_BRK_ADDR_SYMBOL #define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1 -#if _MIPS_SIM == _ABIN32 -/* Convert X to a long long, without losing any bits if it is one - already or warning if it is a 32-bit pointer. */ -# define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X)) -typedef long long int __syscall_arg_t; -#else -# define ARGIFY(X) ((long int) (X)) -typedef long int __syscall_arg_t; -#endif +#include <syscall_types.h> /* Note that the original Linux syscall restart convention required the instruction immediately preceding SYSCALL to initialize $v0 with the @@ -120,7 +112,7 @@ typedef long int __syscall_arg_t; long int _sys_result; \ \ { \ - __syscall_arg_t _arg1 = ARGIFY (arg1); \ + __syscall_arg_t _arg1 = __SSC (arg1); \ register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ register __syscall_arg_t __v0 asm ("$2"); \ @@ -144,8 +136,8 @@ typedef long int __syscall_arg_t; long int _sys_result; \ \ { \ - __syscall_arg_t _arg1 = ARGIFY (arg1); \ - __syscall_arg_t _arg2 = ARGIFY (arg2); \ + __syscall_arg_t _arg1 = __SSC (arg1); \ + __syscall_arg_t _arg2 = __SSC (arg2); \ register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ register __syscall_arg_t __v0 asm ("$2"); \ @@ -170,9 +162,9 @@ typedef long int __syscall_arg_t; long int _sys_result; \ \ { \ - __syscall_arg_t _arg1 = ARGIFY (arg1); \ - __syscall_arg_t _arg2 = ARGIFY (arg2); \ - __syscall_arg_t _arg3 = ARGIFY (arg3); \ + __syscall_arg_t _arg1 = __SSC (arg1); \ + __syscall_arg_t _arg2 = __SSC (arg2); \ + __syscall_arg_t _arg3 = __SSC (arg3); \ register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ register __syscall_arg_t __v0 asm ("$2"); \ @@ -199,10 +191,10 @@ typedef long int __syscall_arg_t; long int _sys_result; \ \ { \ - __syscall_arg_t _arg1 = ARGIFY (arg1); \ - __syscall_arg_t _arg2 = ARGIFY (arg2); \ - __syscall_arg_t _arg3 = ARGIFY (arg3); \ - __syscall_arg_t _arg4 = ARGIFY (arg4); \ + __syscall_arg_t _arg1 = __SSC (arg1); \ + __syscall_arg_t _arg2 = __SSC (arg2); \ + __syscall_arg_t _arg3 = __SSC (arg3); \ + __syscall_arg_t _arg4 = __SSC (arg4); \ register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ register __syscall_arg_t __v0 asm ("$2"); \ @@ -229,11 +221,11 @@ typedef long int __syscall_arg_t; long int _sys_result; \ \ { \ - __syscall_arg_t _arg1 = ARGIFY (arg1); \ - __syscall_arg_t _arg2 = ARGIFY (arg2); \ - __syscall_arg_t _arg3 = ARGIFY (arg3); \ - __syscall_arg_t _arg4 = ARGIFY (arg4); \ - __syscall_arg_t _arg5 = ARGIFY (arg5); \ + __syscall_arg_t _arg1 = __SSC (arg1); \ + __syscall_arg_t _arg2 = __SSC (arg2); \ + __syscall_arg_t _arg3 = __SSC (arg3); \ + __syscall_arg_t _arg4 = __SSC (arg4); \ + __syscall_arg_t _arg5 = __SSC (arg5); \ register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ register __syscall_arg_t __v0 asm ("$2"); \ @@ -261,12 +253,12 @@ typedef long int __syscall_arg_t; long int _sys_result; \ \ { \ - __syscall_arg_t _arg1 = ARGIFY (arg1); \ - __syscall_arg_t _arg2 = ARGIFY (arg2); \ - __syscall_arg_t _arg3 = ARGIFY (arg3); \ - __syscall_arg_t _arg4 = ARGIFY (arg4); \ - __syscall_arg_t _arg5 = ARGIFY (arg5); \ - __syscall_arg_t _arg6 = ARGIFY (arg6); \ + __syscall_arg_t _arg1 = __SSC (arg1); \ + __syscall_arg_t _arg2 = __SSC (arg2); \ + __syscall_arg_t _arg3 = __SSC (arg3); \ + __syscall_arg_t _arg4 = __SSC (arg4); \ + __syscall_arg_t _arg5 = __SSC (arg5); \ + __syscall_arg_t _arg6 = __SSC (arg6); \ register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ register __syscall_arg_t __v0 asm ("$2"); \ diff --git a/sysdeps/unix/sysv/linux/mremap-failure.h b/sysdeps/unix/sysv/linux/mremap-failure.h new file mode 100644 index 0000000000..c99ab30ca9 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mremap-failure.h @@ -0,0 +1,30 @@ +/* mremap failure handling. Linux version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/check.h> + +/* Return exit value on mremap failure with errno ERR. */ + +static int +mremap_failure_exit (int err) +{ + if (err != EINVAL) + return EXIT_FAILURE; + + return EXIT_UNSUPPORTED; +} diff --git a/sysdeps/unix/sysv/linux/mremap.c b/sysdeps/unix/sysv/linux/mremap.c index 4f770799c4..1ada5c1f40 100644 --- a/sysdeps/unix/sysv/linux/mremap.c +++ b/sysdeps/unix/sysv/linux/mremap.c @@ -20,6 +20,12 @@ #include <sysdep.h> #include <stdarg.h> #include <stddef.h> +#include <errno.h> + +#define MREMAP_KNOWN_BITS \ + (MREMAP_MAYMOVE \ + | MREMAP_FIXED \ + | MREMAP_DONTUNMAP) void * __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...) @@ -27,7 +33,13 @@ __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...) va_list va; void *new_addr = NULL; - if (flags & MREMAP_FIXED) + if (flags & ~(MREMAP_KNOWN_BITS)) + { + __set_errno (EINVAL); + return MAP_FAILED; + } + + if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { va_start (va, flags); new_addr = va_arg (va, void *); diff --git a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h index abbc9ab6b0..b1d0fb6f6e 100644 --- a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h @@ -24,6 +24,7 @@ #define __NR_clock_settime 112 #define __NR_clock_settime64 404 #define __NR_clone 220 +#define __NR_clone3 435 #define __NR_close 57 #define __NR_close_range 436 #define __NR_connect 203 @@ -167,6 +168,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/nios2/libc.abilist b/sysdeps/unix/sysv/linux/nios2/libc.abilist index 3aa81766aa..773d4c5873 100644 --- a/sysdeps/unix/sysv/linux/nios2/libc.abilist +++ b/sysdeps/unix/sysv/linux/nios2/libc.abilist @@ -2873,3 +2873,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/nios2/syscall_cancel.S b/sysdeps/unix/sysv/linux/nios2/syscall_cancel.S new file mode 100644 index 0000000000..19d0795886 --- /dev/null +++ b/sysdeps/unix/sysv/linux/nios2/syscall_cancel.S @@ -0,0 +1,95 @@ +/* Cancellable syscall wrapper. Linux/nios2 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + +ENTRY (__syscall_cancel_arch) +#ifdef SHARED + addi sp, sp, -8 + stw r22, 0(sp) + nextpc r22 +1: + movhi r8, %hiadj(_gp_got - 1b) + addi r8, r8, %lo(_gp_got - 1b) + stw ra, 4(sp) + add r22, r22, r8 +#else + addi sp, sp, -4 + cfi_def_cfa_offset (4) + stw ra, 0(sp) + cfi_offset (31, -4) +#endif + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + ldw r3, 0(r4) + andi r3, r3, TCB_CANCELED_BITMASK + bne r3, zero, 3f + mov r10, r6 + mov r2, r5 +#ifdef SHARED +# define STACK_ADJ 4 +#else +# define STACK_ADJ 0 +#endif + ldw r9, (16 + STACK_ADJ)(sp) + mov r5, r7 + ldw r8, (12 + STACK_ADJ)(sp) + ldw r7, (8 + STACK_ADJ)(sp) + ldw r6, (4 + STACK_ADJ)(sp) + mov r4, r10 + trap + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + beq r7, zero, 2f + sub r2, zero, r2 +2: +#ifdef SHARED + ldw ra, 4(sp) + ldw r22, 0(sp) + addi sp, sp, 8 +#else + ldw ra, (0 + STACK_ADJ)(sp) + cfi_remember_state + cfi_restore (31) + addi sp, sp, 4 + cfi_def_cfa_offset (0) +#endif + ret + +3: +#ifdef SHARED + ldw r2, %call(__syscall_do_cancel)(r22) + callr r2 +#else + cfi_restore_state + call __syscall_do_cancel +#endif + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/olddirent.h b/sysdeps/unix/sysv/linux/olddirent.h index 239f790648..065ca41a6e 100644 --- a/sysdeps/unix/sysv/linux/olddirent.h +++ b/sysdeps/unix/sysv/linux/olddirent.h @@ -34,8 +34,6 @@ extern struct __old_dirent64 *__old_readdir64 (DIR *__dirp); libc_hidden_proto (__old_readdir64); extern int __old_readdir64_r (DIR *__dirp, struct __old_dirent64 *__entry, struct __old_dirent64 **__result); -extern __ssize_t __old_getdents64 (int __fd, char *__buf, size_t __nbytes) - attribute_hidden; int __old_scandir64 (const char * __dir, struct __old_dirent64 *** __namelist, int (*__selector) (const struct __old_dirent64 *), diff --git a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h index 7223a93673..2d21fa2085 100644 --- a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h @@ -167,6 +167,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist index 959e59e7e7..c356a11b1c 100644 --- a/sysdeps/unix/sysv/linux/or1k/libc.abilist +++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist @@ -2259,3 +2259,5 @@ GLIBC_2.40 getcontext F GLIBC_2.40 makecontext F GLIBC_2.40 setcontext F GLIBC_2.40 swapcontext F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/or1k/syscall_cancel.S b/sysdeps/unix/sysv/linux/or1k/syscall_cancel.S new file mode 100644 index 0000000000..876f5e05ab --- /dev/null +++ b/sysdeps/unix/sysv/linux/or1k/syscall_cancel.S @@ -0,0 +1,63 @@ +/* Cancellable syscall wrapper. Linux/or1k version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +ENTRY (__syscall_cancel_arch) + l.addi r1, r1, -4 + cfi_def_cfa_offset (4) + l.sw 0(r1), r9 + cfi_offset (9, -4) + + .global __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + l.movhi r19, hi(0) + l.lwz r17, 0(r3) + l.andi r17, r17, 8 + l.sfeq r17, r19 + l.bnf 1f + + /* Issue a 6 argument syscall. */ + l.or r11, r4, r4 + l.or r3, r5, r5 + l.or r4, r6, r6 + l.or r5, r7, r7 + l.or r6, r8, r8 + l.lwz r7, 4(r1) + l.lwz r8, 8(r1) + l.sys 1 + l.nop + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + l.lwz r9, 0(r1) + l.jr r9 + l.addi r1, r1, 4 + cfi_remember_state + cfi_def_cfa_offset (0) + cfi_restore (9) +1: + cfi_restore_state + l.jal __syscall_do_cancel + l.nop +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/pkey_get.c b/sysdeps/unix/sysv/linux/pkey_get.c index 743bd09268..ed9b6297d1 100644 --- a/sysdeps/unix/sysv/linux/pkey_get.c +++ b/sysdeps/unix/sysv/linux/pkey_get.c @@ -17,10 +17,13 @@ <https://www.gnu.org/licenses/>. */ #include <errno.h> +#include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { __set_errno (ENOSYS); return -1; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/pkey_mprotect.c b/sysdeps/unix/sysv/linux/pkey_mprotect.c index b7afa7946d..e321a7eb1b 100644 --- a/sysdeps/unix/sysv/linux/pkey_mprotect.c +++ b/sysdeps/unix/sysv/linux/pkey_mprotect.c @@ -22,7 +22,7 @@ #include <sysdep.h> int -pkey_mprotect (void *addr, size_t len, int prot, int pkey) +__pkey_mprotect (void *addr, size_t len, int prot, int pkey) { if (pkey == -1) /* If the key is -1, the system call is precisely equivalent to @@ -30,3 +30,5 @@ pkey_mprotect (void *addr, size_t len, int prot, int pkey) return __mprotect (addr, len, prot); return INLINE_SYSCALL_CALL (pkey_mprotect, addr, len, prot, pkey); } +libc_hidden_def (__pkey_mprotect) +weak_alias (__pkey_mprotect, pkey_mprotect) diff --git a/sysdeps/unix/sysv/linux/pkey_set.c b/sysdeps/unix/sysv/linux/pkey_set.c index d5d59e3031..30463ef89b 100644 --- a/sysdeps/unix/sysv/linux/pkey_set.c +++ b/sysdeps/unix/sysv/linux/pkey_set.c @@ -17,10 +17,13 @@ <https://www.gnu.org/licenses/>. */ #include <errno.h> +#include <sys/mman.h> int -pkey_set (int key, unsigned int access_rights) +__pkey_set (int key, unsigned int access_rights) { __set_errno (ENOSYS); return -1; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/powerpc/cancellation-pc-check.h b/sysdeps/unix/sysv/linux/powerpc/cancellation-pc-check.h new file mode 100644 index 0000000000..1175e1a070 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/cancellation-pc-check.h @@ -0,0 +1,65 @@ +/* Architecture specific code for pthread cancellation handling. + Linux/PowerPC version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _NPTL_CANCELLATION_PC_CHECK +#define _NPTL_CANCELLATION_PC_CHECK + +#include <sigcontextinfo.h> + +/* For syscalls with side-effects (e.g read that might return partial read), + the kernel cannot restart the syscall when interrupted by a signal, it must + return from the call with whatever partial result. In this case, the saved + program counter is set just after the syscall instruction, so the SIGCANCEL + handler should not act on cancellation. + + The __syscall_cancel_arch function, used for all cancellable syscalls, + contains two extra markers, __syscall_cancel_arch_start and + __syscall_cancel_arch_end. The former points to just before the initial + conditional branch that checks if the thread has received a cancellation + request, while former points to the instruction after the one responsible + to issue the syscall. + + The function check if the program counter (PC) from ucontext_t CTX is + within the start and then end boundary from the __syscall_cancel_arch + bridge. Return TRUE if the PC is within the boundary, meaning the + syscall does not have any side effects; or FALSE otherwise. */ + +static __always_inline bool +cancellation_pc_check (void *ctx) +{ + /* Both are defined in syscall_cancel.S. */ + extern const char __syscall_cancel_arch_start[1]; + extern const char __syscall_cancel_arch_end_sc[1]; +#if defined(USE_PPC_SVC) && defined(__powerpc64__) + extern const char __syscall_cancel_arch_end_svc[1]; +#endif + + uintptr_t pc = sigcontext_get_pc (ctx); + + return pc >= (uintptr_t) __syscall_cancel_arch_start +#if defined(USE_PPC_SVC) && defined(__powerpc64__) + && THREAD_GET_HWCAP() & PPC_FEATURE2_SCV + ? pc < (uintptr_t) __syscall_cancel_arch_end_sc + : pc < (uintptr_t) __syscall_cancel_arch_end_svc; +#else + && pc < (uintptr_t) __syscall_cancel_arch_end_sc; +#endif +} + +#endif diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h index af0d2b121e..206d9fd656 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h @@ -211,6 +211,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 263 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist index 9714305608..7937f94cf0 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist @@ -3138,6 +3138,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist index 0beb52c542..d6e35f31d2 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist @@ -3183,6 +3183,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h index a4c70aa7fe..19f72a7f69 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h @@ -195,6 +195,7 @@ #define __NR_mq_timedsend 264 #define __NR_mq_unlink 263 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist index cfc2ebd3ec..2268d6890d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist @@ -2892,6 +2892,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist index 8c9efc5a16..7f61b14bc8 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist @@ -2968,3 +2968,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c index 217ecd004f..ac9ea3a1c1 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c @@ -21,7 +21,7 @@ #include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { if (key < 0 || key > PKEY_MAX) { @@ -40,3 +40,5 @@ pkey_get (int key) return PKEY_DISABLE_WRITE; return 0; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c index cc67b0acff..ac10b5de22 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c @@ -21,7 +21,7 @@ #include <sys/mman.h> int -pkey_set (int key, unsigned int rights) +__pkey_set (int key, unsigned int rights) { if (key < 0 || key > PKEY_MAX || rights > 3) { @@ -46,3 +46,5 @@ pkey_set (int key, unsigned int rights) pkey_write (amr); return 0; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/powerpc/syscall_cancel.S b/sysdeps/unix/sysv/linux/powerpc/syscall_cancel.S new file mode 100644 index 0000000000..1f119d0889 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/syscall_cancel.S @@ -0,0 +1,86 @@ +/* Cancellable syscall wrapper. Linux/powerpc version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int [r3] __syscall_cancel_arch (int *cancelhandling [r3], + long int nr [r4], + long int arg1 [r5], + long int arg2 [r6], + long int arg3 [r7], + long int arg4 [r8], + long int arg5 [r9], + long int arg6 [r10]) */ + +ENTRY (__syscall_cancel_arch) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + lwz r0,0(r3) + andi. r0,r0,TCB_CANCELED_BITMASK + bne 1f + + /* Issue a 6 argument syscall, the nr [r4] being the syscall + number. */ + mr r0,r4 + mr r3,r5 + mr r4,r6 + mr r5,r7 + mr r6,r8 + mr r7,r9 + mr r8,r10 + +#if defined(USE_PPC_SVC) && defined(__powerpc64__) + CHECK_SCV_SUPPORT r9 0f + + stdu r1, -SCV_FRAME_SIZE(r1) + cfi_adjust_cfa_offset (SCV_FRAME_SIZE) + .machine "push" + .machine "power9" + scv 0 + .machine "pop" + .globl __syscall_cancel_arch_end_svc +__syscall_cancel_arch_end_svc: + ld r9, SCV_FRAME_SIZE + FRAME_LR_SAVE(r1) + mtlr r9 + addi r1, r1, SCV_FRAME_SIZE + cfi_restore (lr) + li r9, -4095 + cmpld r3, r9 + bnslr+ + neg r3,r3 + blr +0: +#endif + sc + .globl __syscall_cancel_arch_end_sc +__syscall_cancel_arch_end_sc: + bnslr+ + neg r3,r3 + blr + + /* Although the __syscall_do_cancel do not return, we need to stack + being set correctly for unwind. */ +1: + TAIL_CALL_NO_RETURN (__syscall_do_cancel) + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/readdir64.c b/sysdeps/unix/sysv/linux/readdir64.c index e6f5108c0a..e6b8867b7a 100644 --- a/sysdeps/unix/sysv/linux/readdir64.c +++ b/sysdeps/unix/sysv/linux/readdir64.c @@ -26,17 +26,13 @@ #undef __readdir #undef readdir -/* Read a directory entry from DIRP. */ -struct dirent64 * -__readdir64 (DIR *dirp) +/* Read a directory entry from DIRP. No locking. */ +static struct dirent64 * +__readdir64_unlocked (DIR *dirp) { struct dirent64 *dp; int saved_errno = errno; -#if IS_IN (libc) - __libc_lock_lock (dirp->lock); -#endif - if (dirp->offset >= dirp->size) { /* We've emptied out our buffer. Refill it. */ @@ -53,9 +49,6 @@ __readdir64 (DIR *dirp) do not set errno in that case, to indicate success. */ if (bytes == 0 || errno == ENOENT) __set_errno (saved_errno); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif return NULL; } dirp->size = (size_t) bytes; @@ -68,10 +61,16 @@ __readdir64 (DIR *dirp) dirp->offset += dp->d_reclen; dirp->filepos = dp->d_off; -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif + return dp; +} +/* Read a directory entry from DIRP. */ +struct dirent64 * +__readdir64 (DIR *dirp) +{ + __libc_lock_lock (dirp->lock); + struct dirent64 *dp = __readdir64_unlocked (dirp); + __libc_lock_unlock (dirp->lock); return dp; } libc_hidden_def (__readdir64) @@ -99,45 +98,54 @@ __old_readdir64 (DIR *dirp) struct __old_dirent64 *dp; int saved_errno = errno; -#if IS_IN (libc) __libc_lock_lock (dirp->lock); -#endif - if (dirp->offset >= dirp->size) + while (1) { - /* We've emptied out our buffer. Refill it. */ + errno = 0; + struct dirent64 *newdp = __readdir64_unlocked (dirp); + if (newdp == NULL) + { + if (errno == 0 && dirp->errcode != 0) + __set_errno (dirp->errcode); + else if (errno == 0) + __set_errno (saved_errno); + dp = NULL; + break; + } - size_t maxread = dirp->allocation; - ssize_t bytes; + /* Convert to the target layout. Use a separate struct and + memcpy to side-step aliasing issues. */ + struct __old_dirent64 result; + result.d_ino = newdp->d_ino; + result.d_off = newdp->d_off; + result.d_reclen = newdp->d_reclen; + result.d_type = newdp->d_type; - bytes = __old_getdents64 (dirp->fd, dirp->data, maxread); - if (bytes <= 0) + /* Check for ino_t overflow. */ + if (__glibc_unlikely (result.d_ino != newdp->d_ino)) { - /* Linux may fail with ENOENT on some file systems if the - directory inode is marked as dead (deleted). POSIX - treats this as a regular end-of-directory condition, so - do not set errno in that case, to indicate success. */ - if (bytes == 0 || errno == ENOENT) - __set_errno (saved_errno); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif - return NULL; + dirp->errcode = ENAMETOOLONG; + continue; } - dirp->size = (size_t) bytes; - /* Reset the offset into the buffer. */ - dirp->offset = 0; - } + /* Overwrite the fixed-sized part. */ + dp = (struct __old_dirent64 *) newdp; + memcpy (dp, &result, offsetof (struct __old_dirent64, d_name)); - dp = (struct __old_dirent64 *) &dirp->data[dirp->offset]; - dirp->offset += dp->d_reclen; - dirp->filepos = dp->d_off; + /* Move the name. */ + _Static_assert (offsetof (struct __old_dirent64, d_name) + <= offsetof (struct dirent64, d_name), + "old struct must be smaller"); + if (offsetof (struct __old_dirent64, d_name) + != offsetof (struct dirent64, d_name)) + memmove (dp->d_name, newdp->d_name, strlen (newdp->d_name) + 1); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif + __set_errno (saved_errno); + break; + } + __libc_lock_unlock (dirp->lock); return dp; } libc_hidden_def (__old_readdir64) diff --git a/sysdeps/unix/sysv/linux/readdir64_r.c b/sysdeps/unix/sysv/linux/readdir64_r.c index e87882ee06..c42a161ffc 100644 --- a/sysdeps/unix/sysv/linux/readdir64_r.c +++ b/sysdeps/unix/sysv/linux/readdir64_r.c @@ -37,7 +37,7 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result) __libc_lock_lock (dirp->lock); - do + while (1) { if (dirp->offset >= dirp->size) { @@ -79,26 +79,21 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result) dirp->filepos = dp->d_off; - if (reclen > offsetof (struct dirent64, d_name) + NAME_MAX + 1) + if (reclen <= offsetof (struct dirent64, d_name) + NAME_MAX + 1) + break; + + /* The record is very long. It could still fit into the + caller-supplied buffer if we can skip padding at the end. */ + size_t namelen = _D_EXACT_NAMLEN (dp); + if (namelen <= NAME_MAX) { - /* The record is very long. It could still fit into the - caller-supplied buffer if we can skip padding at the - end. */ - size_t namelen = _D_EXACT_NAMLEN (dp); - if (namelen <= NAME_MAX) - reclen = offsetof (struct dirent64, d_name) + namelen + 1; - else - { - /* The name is too long. Ignore this file. */ - dirp->errcode = ENAMETOOLONG; - dp->d_ino = 0; - continue; - } + reclen = offsetof (struct dirent64, d_name) + namelen + 1; + break; } - /* Skip deleted and ignored files. */ + /* The name is too long. Ignore this file. */ + dirp->errcode = ENAMETOOLONG; } - while (dp->d_ino == 0); if (dp != NULL) { @@ -135,91 +130,37 @@ attribute_compat_text_section __old_readdir64_r (DIR *dirp, struct __old_dirent64 *entry, struct __old_dirent64 **result) { - struct __old_dirent64 *dp; - size_t reclen; - const int saved_errno = errno; - int ret; - - __libc_lock_lock (dirp->lock); - - do + while (1) { - if (dirp->offset >= dirp->size) - { - /* We've emptied out our buffer. Refill it. */ - - size_t maxread = dirp->allocation; - ssize_t bytes; - - maxread = dirp->allocation; - - bytes = __old_getdents64 (dirp->fd, dirp->data, maxread); - if (bytes <= 0) - { - /* On some systems getdents fails with ENOENT when the - open directory has been rmdir'd already. POSIX.1 - requires that we treat this condition like normal EOF. */ - if (bytes < 0 && errno == ENOENT) - { - bytes = 0; - __set_errno (saved_errno); - } - if (bytes < 0) - dirp->errcode = errno; - - dp = NULL; - break; - } - dirp->size = (size_t) bytes; + struct dirent64 new_entry; + struct dirent64 *newp; + int ret = __readdir64_r (dirp, &new_entry, &newp); - /* Reset the offset into the buffer. */ - dirp->offset = 0; + if (ret != 0) + return ret; + else if (newp == NULL) + { + *result = NULL; + return 0; } - - dp = (struct __old_dirent64 *) &dirp->data[dirp->offset]; - - reclen = dp->d_reclen; - - dirp->offset += reclen; - - dirp->filepos = dp->d_off; - - if (reclen > offsetof (struct __old_dirent64, d_name) + NAME_MAX + 1) + else { - /* The record is very long. It could still fit into the - caller-supplied buffer if we can skip padding at the - end. */ - size_t namelen = _D_EXACT_NAMLEN (dp); - if (namelen <= NAME_MAX) - reclen = offsetof (struct __old_dirent64, d_name) + namelen + 1; - else + entry->d_ino = newp->d_ino; + if (entry->d_ino != newp->d_ino) { - /* The name is too long. Ignore this file. */ - dirp->errcode = ENAMETOOLONG; - dp->d_ino = 0; + dirp->errcode = EOVERFLOW; continue; } + size_t namelen = strlen (newp->d_name); + entry->d_off = newp->d_off; + entry->d_reclen = (offsetof (struct __old_dirent64, d_name) + + namelen + 1); + entry->d_type = newp->d_type; + memcpy (entry->d_name, newp->d_name, namelen + 1); + *result = entry; + return 0; } - - /* Skip deleted and ignored files. */ } - while (dp->d_ino == 0); - - if (dp != NULL) - { - *result = memcpy (entry, dp, reclen); - entry->d_reclen = reclen; - ret = 0; - } - else - { - *result = NULL; - ret = dirp->errcode; - } - - __libc_lock_unlock (dirp->lock); - - return ret; } compat_symbol (libc, __old_readdir64_r, readdir64_r, GLIBC_2_1); diff --git a/sysdeps/unix/sysv/linux/readdir_r.c b/sysdeps/unix/sysv/linux/readdir_r.c index ffd5262cf5..1d595688f7 100644 --- a/sysdeps/unix/sysv/linux/readdir_r.c +++ b/sysdeps/unix/sysv/linux/readdir_r.c @@ -25,14 +25,22 @@ __readdir_r (DIR *dirp, struct dirent *entry, struct dirent **result) { struct dirent *dp; size_t reclen; + int saved_errno = errno; __libc_lock_lock (dirp->lock); while (1) { + /* If errno is changed from 0, the NULL return value indicates + an actual error. It overrides a pending ENAMETOOLONG error. */ + __set_errno (0); dp = __readdir_unlocked (dirp); if (dp == NULL) - break; + { + if (errno != 0) + dirp->errcode = errno; + break; + } reclen = dp->d_reclen; if (reclen <= offsetof (struct dirent, d_name) + NAME_MAX + 1) @@ -61,6 +69,7 @@ __readdir_r (DIR *dirp, struct dirent *entry, struct dirent **result) __libc_lock_unlock (dirp->lock); + __set_errno (saved_errno); return dp != NULL ? 0 : dirp->errcode; } diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h index 7315d164d6..eb9e57b028 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h @@ -153,6 +153,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist index 6397a9cb91..4187241f50 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist @@ -2512,3 +2512,5 @@ GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F GLIBC_2.40 __riscv_hwprobe F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h index 31a1130db9..1eac18e582 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h @@ -158,6 +158,7 @@ #define __NR_mq_timedsend 182 #define __NR_mq_unlink 181 #define __NR_mremap 216 +#define __NR_mseal 462 #define __NR_msgctl 187 #define __NR_msgget 186 #define __NR_msgrcv 188 diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist index 71bbf94f66..8935beccac 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist @@ -2712,3 +2712,5 @@ GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F GLIBC_2.40 __riscv_hwprobe F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/unix/sysv/linux/riscv/syscall_cancel.S b/sysdeps/unix/sysv/linux/riscv/syscall_cancel.S new file mode 100644 index 0000000000..742c748d09 --- /dev/null +++ b/sysdeps/unix/sysv/linux/riscv/syscall_cancel.S @@ -0,0 +1,67 @@ +/* Cancellable syscall wrapper. Linux/riscv version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + +#ifdef SHARED + .option pic +#else + .option nopic +#endif + +ENTRY (__syscall_cancel_arch) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + lw t1, 0(a0) + /* if (*ch & CANCELED_BITMASK) */ + andi t1, t1, TCB_CANCELED_BITMASK + bne t1, zero, 1f + + mv t3, a1 + mv a0, a2 + mv a1, a3 + mv a2, a4 + mv a3, a5 + mv a4, a6 + mv a5, a7 + mv a7, t3 + scall + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + ret + +1: + addi sp, sp, -16 + cfi_def_cfa_offset (16) + REG_S ra, (16-SZREG)(sp) + cfi_offset (ra, -SZREG) + call __syscall_do_cancel + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h index cf8569304d..464eca58b2 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h @@ -214,6 +214,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 272 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist index a7467e2850..e69dc7ccf6 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist @@ -3136,6 +3136,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/syscall_cancel.S b/sysdeps/unix/sysv/linux/s390/s390-32/syscall_cancel.S new file mode 100644 index 0000000000..9e0ad2a635 --- /dev/null +++ b/sysdeps/unix/sysv/linux/s390/s390-32/syscall_cancel.S @@ -0,0 +1,62 @@ +/* Cancellable syscall wrapper. Linux/s390 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + +ENTRY (__syscall_cancel_arch) + stm %r6,%r7,24(%r15) + cfi_offset (%r6, -72) + cfi_offset (%r7, -68) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + tm 3(%r2),TCB_CANCELED_BITMASK + jne 1f + + /* Issue a 6 argument syscall, the nr [%r1] being the syscall + number. */ + lr %r1,%r3 + lr %r2,%r4 + lr %r3,%r5 + lr %r4,%r6 + lm %r5,%r7,96(%r15) + svc 0 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + lm %r6,%r7,24(%r15) + cfi_remember_state + cfi_restore (%r7) + cfi_restore (%r6) + br %r14 +1: + cfi_restore_state + jg __syscall_do_cancel +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h index f3536ed03f..57842702fd 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h @@ -185,6 +185,7 @@ #define __NR_mq_timedsend 273 #define __NR_mq_unlink 272 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist index fd1cb2972d..7d860001d8 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist @@ -2929,6 +2929,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/syscall_cancel.S b/sysdeps/unix/sysv/linux/s390/s390-64/syscall_cancel.S new file mode 100644 index 0000000000..e1620add6a --- /dev/null +++ b/sysdeps/unix/sysv/linux/s390/s390-64/syscall_cancel.S @@ -0,0 +1,62 @@ +/* Cancellable syscall wrapper. Linux/s390x version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + __syscall_arg_t nr, + __syscall_arg_t arg1, + __syscall_arg_t arg2, + __syscall_arg_t arg3, + __syscall_arg_t arg4, + __syscall_arg_t arg5, + __syscall_arg_t arg6) */ + +ENTRY (__syscall_cancel_arch) + stmg %r6,%r7,48(%r15) + cfi_offset (%r6, -112) + cfi_offset (%r7, -104) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + tm 3(%r2),TCB_CANCELED_BITMASK + jne 1f + + /* Issue a 6 argument syscall, the nr [%r1] being the syscall + number. */ + lgr %r1,%r3 + lgr %r2,%r4 + lgr %r3,%r5 + lgr %r4,%r6 + lmg %r5,%r7,160(%r15) + svc 0 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + lmg %r6,%r7,48(%r15) + cfi_remember_state + cfi_restore (%r7) + cfi_restore (%r6) + br %r14 +1: + cfi_restore_state + jg __syscall_do_cancel +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/sched_getattr.c b/sysdeps/unix/sysv/linux/sched_getattr.c new file mode 100644 index 0000000000..e6b9970fcb --- /dev/null +++ b/sysdeps/unix/sysv/linux/sched_getattr.c @@ -0,0 +1,27 @@ +/* Reading scheduling policy and attributes. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sched.h> +#include <sysdep.h> + +int +sched_getattr (pid_t pid, struct sched_attr *attr, unsigned int size, + unsigned int flags) +{ + return INLINE_SYSCALL_CALL (sched_getattr, pid, attr, size, flags); +} diff --git a/sysdeps/unix/sysv/linux/sched_setattr.c b/sysdeps/unix/sysv/linux/sched_setattr.c new file mode 100644 index 0000000000..25403fb454 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sched_setattr.c @@ -0,0 +1,26 @@ +/* Setting scheduling policy and attributes. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sched.h> +#include <sysdep.h> + +int +sched_setattr (pid_t pid, struct sched_attr *attr, unsigned int flags) +{ + return INLINE_SYSCALL_CALL (sched_setattr, pid, attr, flags); +} diff --git a/sysdeps/unix/sysv/linux/sh/arch-syscall.h b/sysdeps/unix/sysv/linux/sh/arch-syscall.h index 0c88bf10c7..165ba017c7 100644 --- a/sysdeps/unix/sysv/linux/sh/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/sh/arch-syscall.h @@ -206,6 +206,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 278 #define __NR_mremap 163 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 @@ -378,6 +379,7 @@ #define __NR_symlinkat 304 #define __NR_sync 36 #define __NR_sync_file_range 314 +#define __NR_sync_file_range2 388 #define __NR_syncfs 362 #define __NR_sysfs 135 #define __NR_sysinfo 116 diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist index ff6e6b1a13..fcb8161841 100644 --- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist @@ -2828,6 +2828,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist index 449d92bbc5..3fd078d125 100644 --- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist @@ -2825,6 +2825,8 @@ GLIBC_2.4 sys_errlist D 0x210 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sh/syscall_cancel.S b/sysdeps/unix/sysv/linux/sh/syscall_cancel.S new file mode 100644 index 0000000000..2afd23928d --- /dev/null +++ b/sysdeps/unix/sysv/linux/sh/syscall_cancel.S @@ -0,0 +1,126 @@ +/* Cancellable syscall wrapper. Linux/sh version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + long int nr, + long int arg1, + long int arg2, + long int arg3, + long int arg4, + long int arg5, + long int arg6) */ + +ENTRY (__syscall_cancel_arch) + +#ifdef SHARED + mov.l r12,@-r15 + cfi_def_cfa_offset (4) + cfi_offset (12, -4) + mova L(GT),r0 + mov.l L(GT),r12 + sts.l pr,@-r15 + cfi_def_cfa_offset (8) + cfi_offset (17, -8) + add r0,r12 +#else + sts.l pr,@-r15 + cfi_def_cfa_offset (4) + cfi_offset (17, -4) +#endif + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + mov.l @r4,r0 + tst #TCB_CANCELED_BITMASK,r0 + bf/s 1f + + /* Issue a 6 argument syscall. */ + mov r5,r3 + mov r6,r4 + mov r7,r5 +#ifdef SHARED + mov.l @(8,r15),r6 + mov.l @(12,r15),r7 + mov.l @(16,r15),r0 + mov.l @(20,r15),r1 +#else + mov.l @(4,r15),r6 + mov.l @(8,r15),r7 + mov.l @(12,r15),r0 + mov.l @(16,r15),r1 +#endif + trapa #0x16 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + /* The additional or is a workaround for a hardware issue: + http://documentation.renesas.com/eng/products/mpumcu/tu/tnsh7456ae.pdf + */ + or r0,r0 + or r0,r0 + or r0,r0 + or r0,r0 + or r0,r0 + + lds.l @r15+,pr + cfi_remember_state + cfi_restore (17) +#ifdef SHARED + cfi_def_cfa_offset (4) + rts + mov.l @r15+,r12 + cfi_def_cfa_offset (0) + cfi_restore (12) + .align 1 +1: + cfi_restore_state + mov.l L(SC),r1 + bsrf r1 +L(M): + nop + + .align 2 +L(GT): + .long _GLOBAL_OFFSET_TABLE_ +L(SC): + .long __syscall_do_cancel-(L(M)+2) +#else + cfi_def_cfa_offset (0) + rts + nop + + .align 1 +1: + cfi_restore_state + mov.l 2f,r1 + jsr @r1 + nop + + .align 2 +2: + .long __syscall_do_cancel +#endif + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/socketcall.h b/sysdeps/unix/sysv/linux/socketcall.h index 537fa43678..0efa5ee9e4 100644 --- a/sysdeps/unix/sysv/linux/socketcall.h +++ b/sysdeps/unix/sysv/linux/socketcall.h @@ -88,14 +88,33 @@ sc_ret; \ }) - -#define SOCKETCALL_CANCEL(name, args...) \ - ({ \ - int oldtype = LIBC_CANCEL_ASYNC (); \ - long int sc_ret = __SOCKETCALL (SOCKOP_##name, args); \ - LIBC_CANCEL_RESET (oldtype); \ - sc_ret; \ - }) +#define __SOCKETCALL_CANCEL1(__name, __a1) \ + SYSCALL_CANCEL (socketcall, __name, \ + ((long int [1]) { (long int) __a1 })) +#define __SOCKETCALL_CANCEL2(__name, __a1, __a2) \ + SYSCALL_CANCEL (socketcall, __name, \ + ((long int [2]) { (long int) __a1, (long int) __a2 })) +#define __SOCKETCALL_CANCEL3(__name, __a1, __a2, __a3) \ + SYSCALL_CANCEL (socketcall, __name, \ + ((long int [3]) { (long int) __a1, (long int) __a2, (long int) __a3 })) +#define __SOCKETCALL_CANCEL4(__name, __a1, __a2, __a3, __a4) \ + SYSCALL_CANCEL (socketcall, __name, \ + ((long int [4]) { (long int) __a1, (long int) __a2, (long int) __a3, \ + (long int) __a4 })) +#define __SOCKETCALL_CANCEL5(__name, __a1, __a2, __a3, __a4, __a5) \ + SYSCALL_CANCEL (socketcall, __name, \ + ((long int [5]) { (long int) __a1, (long int) __a2, (long int) __a3, \ + (long int) __a4, (long int) __a5 })) +#define __SOCKETCALL_CANCEL6(__name, __a1, __a2, __a3, __a4, __a5, __a6) \ + SYSCALL_CANCEL (socketcall, __name, \ + ((long int [6]) { (long int) __a1, (long int) __a2, (long int) __a3, \ + (long int) __a4, (long int) __a5, (long int) __a6 })) + +#define __SOCKETCALL_CANCEL(...) __SOCKETCALL_DISP (__SOCKETCALL_CANCEL,\ + __VA_ARGS__) + +#define SOCKETCALL_CANCEL(name, args...) \ + __SOCKETCALL_CANCEL (SOCKOP_##name, args) #endif /* sys/socketcall.h */ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h index 19fa614624..3bad6f102f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h @@ -211,6 +211,7 @@ #define __NR_mq_timedsend_time64 418 #define __NR_mq_unlink 274 #define __NR_mremap 250 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index 748d25fcfe..c84244f56b 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -28,6 +28,13 @@ .text ENTRY (__clone) save %sp,-96,%sp + save %sp,-96,%sp +#ifdef __sparcv9 + flushw +#else + ta 3 +#endif + restore cfi_def_cfa_register(%fp) cfi_window_save cfi_register(%o7, %i7) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist index e615be759a..1ce1fe9da7 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist @@ -3157,6 +3157,8 @@ GLIBC_2.4 wcstold F GLIBC_2.4 wcstold_l F GLIBC_2.4 wprintf F GLIBC_2.4 wscanf F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S new file mode 100644 index 0000000000..45c6ae7a86 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S @@ -0,0 +1,80 @@ +/* Cancellable syscall wrapper. Linux/sparc32 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int __syscall_cancel_arch (int *cancelhandling, + long int nr, + long int arg1, + long int arg2, + long int arg3, + long int arg4, + long int arg5, + long int arg6) */ + +ENTRY (__syscall_cancel_arch) + save %sp, -96, %sp + + cfi_window_save + cfi_register (%o7, %i7) + cfi_def_cfa_register (%fp) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + ld [%i0], %g2 + andcc %g2, TCB_CANCELED_BITMASK, %g0 +#ifdef __sparcv9 + bne,pn %icc, 2f +#else + bne 2f +#endif + /* Issue a 6 argument syscall. */ + mov %i2, %o0 + mov %i3, %o1 + mov %i4, %o2 + mov %i5, %o3 + ld [%fp+92], %o4 + ld [%fp+96], %o5 + mov %i1, %g1 + ta 0x10 + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + bcc 1f + nop + sub %g0, %o0, %o0 +1: + mov %o0, %i0 +#ifdef __sparcv9 + return %i7+8 + nop +#else + jmp %i7+8 + restore +#endif + +2: + call __syscall_do_cancel, 0 + nop + nop + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index d2d68f5312..c2ffbb5c8f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -107,6 +107,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x10;" \ "bcc 1f;" \ " nop;" \ @@ -114,7 +115,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h index 18516f20cb..98e1437920 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h @@ -192,6 +192,7 @@ #define __NR_mq_timedsend 275 #define __NR_mq_unlink 274 #define __NR_mremap 250 +#define __NR_mseal 462 #define __NR_msgctl 402 #define __NR_msgget 399 #define __NR_msgrcv 401 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S index e5ff2cf1a0..370d51fda2 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S @@ -32,6 +32,9 @@ ENTRY (__clone) save %sp, -192, %sp + save %sp, -192, %sp + flushw + restore cfi_def_cfa_register(%fp) cfi_window_save cfi_register(%o7, %i7) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist index bd36431dd7..07507b86f6 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist @@ -2793,6 +2793,8 @@ GLIBC_2.4 sys_errlist D 0x430 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S new file mode 100644 index 0000000000..6c8d1330cb --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S @@ -0,0 +1,74 @@ +/* Cancellable syscall wrapper. Linux/sparc64 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + + .register %g2, #scratch + +/* long int __syscall_cancel_arch (int *cancelhandling, + long int nr, + long int arg1, + long int arg2, + long int arg3, + long int arg4, + long int arg5, + long int arg6) */ + +ENTRY (__syscall_cancel_arch) + save %sp, -176, %sp + + cfi_window_save + cfi_register (%o7, %i7) + cfi_def_cfa_register (%fp) + + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + lduw [%i0], %g2 + andcc %g2, TCB_CANCELED_BITMASK, %g0 + bne,pn %xcc, 2f + /* Issue a 6 argument syscall. */ + mov %i2, %o0 + mov %i3, %o1 + mov %i4, %o2 + mov %i5, %o3 + ldx [%fp + STACK_BIAS + 176], %o4 + ldx [%fp + STACK_BIAS + 184], %o5 + mov %i1, %g1 + ta 0x6d + + .global __syscall_cancel_arch_end +__syscall_cancel_arch_end: + + bcc,pt %xcc, 1f + nop + sub %g0, %o0, %o0 +1: + mov %o0, %i0 + return %i7+8 + nop + +2: + call __syscall_do_cancel, 0 + nop + nop + +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 96047424e9..5598fab08a 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -106,6 +106,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ " nop;" \ @@ -113,7 +114,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h index dcabb57fe2..c287740a8c 100644 --- a/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -50,97 +50,109 @@ #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, nr, args...) \ - internal_syscall##nr(__SYSCALL_STRING, name, args) + _internal_syscall##nr(__SYSCALL_STRING, "p", name, args) -#define internal_syscall0(string,name,dummy...) \ +#define _internal_syscall0(string,nc,name,dummy...) \ ({ \ - register long int __g1 __asm__ ("g1") = (name); \ register long __o0 __asm__ ("o0"); \ + long int _name = (long int) (name); \ __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall0(string,name,args...) \ + _internal_syscall0(string, "i", name, args) -#define internal_syscall1(string,name,arg1) \ +#define _internal_syscall1(string,nc,name,arg1) \ ({ \ long int _arg1 = (long int) (arg1); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall1(string,name,args...) \ + _internal_syscall1(string, "i", name, args) -#define internal_syscall2(string,name,arg1,arg2) \ +#define _internal_syscall2(string,nc,name,arg1,arg2) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall2(string,name,args...) \ + _internal_syscall2(string, "i", name, args) -#define internal_syscall3(string,name,arg1,arg2,arg3) \ +#define _internal_syscall3(string,nc,name,arg1,arg2,arg3) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall3(string,name,args...) \ + _internal_syscall3(string, "i", name, args) -#define internal_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall4(string,name,args...) \ + _internal_syscall4(string, "i", name, args) -#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall5(string,name,args...) \ + _internal_syscall5(string, "i", name, args) -#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ @@ -148,20 +160,22 @@ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ long int _arg6 = (long int) (arg6); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ register long int __o5 __asm__ ("o5") = _arg6; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall6(string,name,args...) \ + _internal_syscall6(string, "i", name, args) #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5) \ ({ \ @@ -170,15 +184,15 @@ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ + long int _name = __NR_clone; \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - register long int __g1 __asm__ ("g1") = __NR_clone; \ __asm __volatile (__SYSCALL_STRING : \ "=r" (__o0), "=r" (__o1) : \ - "r" (__g1), "0" (__o0), "1" (__o1), \ + [scn] "i" (_name), "0" (__o0), "1" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL)) \ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index f57e92815e..1edf62c6d7 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -383,7 +383,11 @@ __spawnix (int *pid, const char *file, args.pidfd = 0; args.xflags = xflags; - internal_signal_block_all (&args.oldmask); + /* Avoid the potential issues if caller sets a SIG_IGN for SIGABRT, calls + abort, and another thread issues posix_spawn just after the sigaction + returns. With default options (not setting POSIX_SPAWN_SETSIGDEF), the + process can still see SIG_DFL for SIGABRT, where it should be SIG_IGN. */ + __abort_lock_rdlock (&args.oldmask); /* The clone flags used will create a new child that will run in the same memory space (CLONE_VM) and the execution of calling thread will be @@ -474,7 +478,7 @@ __spawnix (int *pid, const char *file, if ((ec == 0) && (pid != NULL)) *pid = use_pidfd ? args.pidfd : new_pid; - internal_signal_restore_set (&args.oldmask); + __abort_lock_unlock (&args.oldmask); __pthread_setcancelstate (state, NULL); diff --git a/sysdeps/unix/sysv/linux/sys/pidfd.h b/sysdeps/unix/sysv/linux/sys/pidfd.h index 9f88d297e8..85d976939b 100644 --- a/sysdeps/unix/sysv/linux/sys/pidfd.h +++ b/sysdeps/unix/sysv/linux/sys/pidfd.h @@ -20,6 +20,7 @@ #include <fcntl.h> #include <bits/types/siginfo_t.h> +#include <sys/ioctl.h> #define PIDFD_NONBLOCK O_NONBLOCK #define PIDFD_THREAD O_EXCL @@ -28,6 +29,19 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + /* Returns a file descriptor that refers to the process PID. The close-on-exec is set on the file descriptor. */ extern int pidfd_open (__pid_t __pid, unsigned int __flags) __THROW; diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 672d39eaad..aa5b479e2a 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 6.9. -kernel 6.9 +# The list of system calls is current as of Linux 6.11. +kernel 6.11 FAST_atomic_update FAST_cmpxchg @@ -287,6 +287,7 @@ mq_timedsend mq_timedsend_time64 mq_unlink mremap +mseal msgctl msgget msgrcv @@ -652,6 +653,7 @@ uname unlink unlinkat unshare +uretprobe uselib userfaultfd usr26 diff --git a/sysdeps/unix/sysv/linux/syscall_cancel.c b/sysdeps/unix/sysv/linux/syscall_cancel.c new file mode 100644 index 0000000000..5fa0706486 --- /dev/null +++ b/sysdeps/unix/sysv/linux/syscall_cancel.c @@ -0,0 +1,73 @@ +/* Pthread cancellation syscall bridge. Default Linux version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <pthreadP.h> + +#warning "This implementation should be use just as reference or for bootstrapping" + +/* This is the generic version of the cancellable syscall code which + adds the label guards (__syscall_cancel_arch_{start,end}) used on SIGCANCEL + handler to check if the cancelled syscall have side-effects that need to be + returned to the caller. + + This implementation should be used as a reference one to document the + implementation constraints: + + 1. The __syscall_cancel_arch_start should point just before the test + that thread is already cancelled, + 2. The __syscall_cancel_arch_end should point to the immediate next + instruction after the syscall one. + 3. It should return the syscall value or a negative result if is has + failed, similar to INTERNAL_SYSCALL_CALL. + + The __syscall_cancel_arch_end one is because the kernel will signal + interrupted syscall with side effects by setting the signal frame program + counter (on the ucontext_t third argument from SA_SIGINFO signal handler) + right after the syscall instruction. + + For some architecture, the INTERNAL_SYSCALL_NCS macro use more instructions + to get the error condition from kernel (as for powerpc and sparc that + checks for the conditional register), or uses an out of the line helper + (ARM thumb), or uses a kernel helper gate (i686 or ia64). In this case + the architecture should either adjust the macro or provide a custom + __syscall_cancel_arch implementation. */ + +long int +__syscall_cancel_arch (volatile int *ch, __syscall_arg_t nr, + __syscall_arg_t a1, __syscall_arg_t a2, + __syscall_arg_t a3, __syscall_arg_t a4, + __syscall_arg_t a5, __syscall_arg_t a6 + __SYSCALL_CANCEL7_ARG_DEF) +{ +#define ADD_LABEL(__label) \ + asm volatile ( \ + ".global " __label "\t\n" \ + __label ":\n"); + + ADD_LABEL ("__syscall_cancel_arch_start"); + if (__glibc_unlikely (*ch & CANCELED_BITMASK)) + __syscall_do_cancel(); + + long int result = INTERNAL_SYSCALL_NCS_CALL (nr, a1, a2, a3, a4, a5, a6 + __SYSCALL_CANCEL7_ARG7); + ADD_LABEL ("__syscall_cancel_arch_end"); + if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result))) + return -INTERNAL_SYSCALL_ERRNO (result); + return result; +} diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list index 9ac42c3436..f1cfe8dc13 100644 --- a/sysdeps/unix/sysv/linux/syscalls.list +++ b/sysdeps/unix/sysv/linux/syscalls.list @@ -102,8 +102,8 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at setns EXTRA setns i:ii setns memfd_create EXTRA memfd_create i:si memfd_create -pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc -pkey_free EXTRA pkey_free i:i pkey_free +pkey_alloc EXTRA pkey_alloc i:ii __pkey_alloc pkey_alloc +pkey_free EXTRA pkey_free i:i __pkey_free pkey_free gettid EXTRA gettid Ei: __gettid gettid tgkill EXTRA tgkill i:iii __tgkill tgkill close_range - close_range i:iii __close_range close_range diff --git a/sysdeps/unix/sysv/linux/sysdep-cancel.h b/sysdeps/unix/sysv/linux/sysdep-cancel.h index c48a50fa88..3f1543fec2 100644 --- a/sysdeps/unix/sysv/linux/sysdep-cancel.h +++ b/sysdeps/unix/sysv/linux/sysdep-cancel.h @@ -21,17 +21,5 @@ #define _SYSDEP_CANCEL_H #include <sysdep.h> -#include <tls.h> -#include <errno.h> - -/* Set cancellation mode to asynchronous. */ -extern int __pthread_enable_asynccancel (void); -libc_hidden_proto (__pthread_enable_asynccancel) -#define LIBC_CANCEL_ASYNC() __pthread_enable_asynccancel () - -/* Reset to previous cancellation mode. */ -extern void __pthread_disable_asynccancel (int oldtype); -libc_hidden_proto (__pthread_disable_asynccancel) -#define LIBC_CANCEL_RESET(oldtype) __pthread_disable_asynccancel (oldtype) #endif diff --git a/sysdeps/unix/sysv/linux/time.c b/sysdeps/unix/sysv/linux/time.c index f8b0cee41c..a56ef6fa30 100644 --- a/sysdeps/unix/sysv/linux/time.c +++ b/sysdeps/unix/sysv/linux/time.c @@ -33,11 +33,10 @@ time_syscall (time_t *t) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_time = dl_vdso_vsym (HAVE_TIME_VSYSCALL); +# define INIT_ARCH() libc_ifunc (time, - vdso_time ? VDSO_IFUNC_RET (vdso_time) - : (void *) time_syscall); + GLRO(dl_vdso_time) != NULL ? VDSO_IFUNC_RET (GLRO(dl_vdso_time)) + : (void *) time_syscall); # else time_t diff --git a/sysdeps/unix/sysv/linux/tst-linux-mremap1.c b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c new file mode 100644 index 0000000000..408e8af2ab --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c @@ -0,0 +1,63 @@ +/* Test mremap with MREMAP_DONTUNMAP. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <sys/mman.h> +#include <support/xstdlib.h> +#include <support/xunistd.h> +#include <support/check.h> +#include <support/test-driver.h> +#include <mremap-failure.h> + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + size_t new_size = old_size; + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + /* Create an available 64-page mmap region. */ + size_t fixed_size = old_size * 64; + char *fixed_addr = xmmap (NULL, fixed_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + xmunmap (fixed_addr, fixed_size); + + /* Add 3 * pagesize. */ + fixed_size += 3 * old_size; + + /* Test MREMAP_DONTUNMAP. It should return FIXED_ADDR created above. */ + char *new_addr = mremap (old_addr, old_size, new_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, + fixed_addr); + if (new_addr == MAP_FAILED) + return mremap_failure_exit (errno); + TEST_VERIFY_EXIT (fixed_addr == new_addr); + old_addr[0] = 3; + old_addr[old_size - 1] = 4; + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + xmunmap (old_addr, old_size); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py index 441261c945..2191bea36f 100644 --- a/sysdeps/unix/sysv/linux/tst-mman-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py @@ -33,7 +33,7 @@ def main(): help='C compiler (including options) to use') args = parser.parse_args() linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 9) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#define _GNU_SOURCE 1\n' '#include <sys/mman.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-mount-consts.py b/sysdeps/unix/sysv/linux/tst-mount-consts.py index c4a67221c1..b71d8a489d 100755 --- a/sysdeps/unix/sysv/linux/tst-mount-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mount-consts.py @@ -39,10 +39,10 @@ def main(): sys.exit (77) linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - # Constants in glibc were updated to match Linux v6.9. When glibc + # Constants in glibc were updated to match Linux v6.10. When glibc # constants are updated this value should be updated to match the # released kernel version from which the constants were taken. - linux_version_glibc = (6, 9) + linux_version_glibc = (6, 11) def check(cte, exclude=None): return glibcextract.compare_macro_consts( '#include <sys/mount.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py index 6f05291949..9d53102cbd 100644 --- a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py +++ b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py @@ -39,7 +39,7 @@ def main(): sys.exit (77) linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 9) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#include <sys/pidfd.h>\n', '#include <asm/fcntl.h>\n' diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c index b9f498473d..2a246c4593 100644 --- a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c +++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c @@ -28,6 +28,11 @@ #include <sys/rseq.h> #include <unistd.h> +/* Set this in 'do_test' only so as to invoke the destructor test in + the test process only and not 'support_test_main' parent. Otherwise + the test harness may hang in the destructor if something goes wrong. */ +static int run_destructor_test; + #ifdef RSEQ_SIG # include <array_length.h> # include <errno.h> @@ -236,6 +241,9 @@ do_rseq_test (void) static void __attribute__ ((destructor)) do_rseq_destructor_test (void) { + if (!run_destructor_test) + return; + /* Cannot use deferred failure reporting after main returns. */ if (do_rseq_test ()) FAIL_EXIT1 ("rseq not registered within destructor"); @@ -254,6 +262,7 @@ do_rseq_test (void) static int do_test (void) { + run_destructor_test = 1; return do_rseq_test (); } diff --git a/sysdeps/unix/sysv/linux/tst-sched-consts.py b/sysdeps/unix/sysv/linux/tst-sched-consts.py new file mode 100644 index 0000000000..f06ac400bd --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-sched-consts.py @@ -0,0 +1,56 @@ +#!/usr/bin/python3 +# Test that glibc's sched.h constants match the kernel's. +# Copyright (C) 2018-2024 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. + +import argparse +import sys + +import glibcextract +import glibcsyscalls + + +def main(): + """The main entry point.""" + parser = argparse.ArgumentParser( + description="Test that glibc's sched.h constants " + "match the kernel's.") + parser.add_argument('--cc', metavar='CC', + help='C compiler (including options) to use') + args = parser.parse_args() + linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) + linux_version_glibc = (6, 11) + sys.exit(glibcextract.compare_macro_consts( + '#define _GNU_SOURCE 1\n' + '#include <sched.h>\n', + '#define _GNU_SOURCE 1\n' + '#include <linux/sched.h>\n' + '#include <linux/sched/types.h>\n', + args.cc, + 'SCHED_.*', + # SCHED_ISO is reserved, but not implemented in the kernel. + # SCHED_OTHER is the standard name for SCHED_NORMAL. + # SCHED_FLAG_ALL will receive more and more flags, so + # exposing it to userspace does not seem useful. + 'SCHED_ISO' + '|SCHED_OTHER' + '|SCHED_FLAG_ALL', + linux_version_glibc > linux_version_headers, + linux_version_headers > linux_version_glibc)) + +if __name__ == '__main__': + main() diff --git a/sysdeps/unix/sysv/linux/tst-sched_setattr.c b/sysdeps/unix/sysv/linux/tst-sched_setattr.c new file mode 100644 index 0000000000..fbb73c31a7 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-sched_setattr.c @@ -0,0 +1,107 @@ +/* Tests for sched_setattr and sched_getattr. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sched.h> + +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <support/check.h> +#include <sys/resource.h> +#include <unistd.h> + +/* Padding struct to detect unexpected writes. */ +union +{ + struct sched_attr attr; + /* Hopefully the kernel will never need as much. */ + unsigned char padding[4096]; +} u; + +static void +check_unused (void) +{ + TEST_VERIFY (u.attr.size < sizeof (u)); + for (unsigned int i = u.attr.size; i < sizeof (u); ++i) + TEST_COMPARE (u.padding[i], 0xcc); +} + +static int +do_test (void) +{ + _Static_assert (SCHED_OTHER == SCHED_NORMAL, + "SCHED_OTHER, SCHED_NORMAL values"); + TEST_VERIFY (sizeof (struct sched_attr) < sizeof (u)); + + /* Check that reading and re-applying the current policy works. */ + memset (&u, 0xcc, sizeof (u)); + /* Compiler barrier to bypass write access attribute. */ + volatile unsigned int size = sizeof (u); + TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0); + check_unused (); + TEST_COMPARE (sched_setattr (0, &u.attr, 0), 0); /* Apply unchanged. */ + + /* Try to switch to the SCHED_OTHER policy. */ + memset (&u, 0, sizeof (u)); + u.attr.size = sizeof (u); /* With padding, kernel should accept zeroes. */ + u.attr.sched_policy = SCHED_OTHER; /* Should be the default. */ + { + errno = 0; + int prio = getpriority (PRIO_PROCESS, 0); + if (errno != 0) + prio = 0; + u.attr.sched_nice = prio; + } + TEST_COMPARE (sched_setattr (0, &u.attr, 0), 0); + + /* Non-zero values not known to the kernel result in an E2BIG error. */ + memset (&u, 0, sizeof (u)); + TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0); + u.padding[u.attr.size] = 0xcc; + u.attr.size = sizeof (u); + errno = 0; + TEST_COMPARE (sched_setattr (0, &u.attr, 0), -1); + TEST_COMPARE (errno, E2BIG); + + memset (&u, 0xcc, sizeof (u)); + TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0); + TEST_COMPARE (u.attr.sched_policy, SCHED_OTHER); + check_unused (); + + /* Raise the niceless level to 19 and observe its effect. */ + TEST_COMPARE (nice (19), 19); + TEST_COMPARE (sched_getattr (0, &u.attr, sizeof (u.attr), 0), 0); + TEST_COMPARE (u.attr.sched_policy, SCHED_OTHER); + TEST_COMPARE (u.attr.sched_nice, 19); + check_unused (); + + /* Invalid buffer arguments result in EINVAL (not EFAULT). */ + { + errno = 0; + void *volatile null_pointer = NULL; /* compiler barrier. */ + TEST_COMPARE (sched_setattr (0, null_pointer, 0), -1); + TEST_COMPARE (errno, EINVAL); + errno = 0; + TEST_COMPARE (sched_getattr (0, null_pointer, size, 0), -1); + TEST_COMPARE (errno, EINVAL); + } + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c b/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c index 31a15b3ad7..2f921ed397 100644 --- a/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c +++ b/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c @@ -157,7 +157,7 @@ test_size (const struct conf *conf, size_t size) if (setaffinity (kernel_size, initial_set) < 0) { printf ("error: size %zu: setaffinity: %m\n", size); - return true; + return false; } /* Use one-CPU set to test switching between CPUs. */ diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c new file mode 100644 index 0000000000..84a8a41b5c --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -0,0 +1,112 @@ +/* Test if a syscall is correctly restarted. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/xsignal.h> +#include <support/check.h> +#include <support/process_state.h> +#include <support/xunistd.h> +#include <support/xthread.h> +#include <sys/wait.h> + +static int +check_pid (pid_t pid) +{ + /* Wait until the child has called pause and it blocking on kernel. */ + support_process_state_wait (pid, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGSTOP), 0); + + /* Adding process_state_tracing_stop ('t') allows the test to work under + trace programs such as ptrace. */ + support_process_state_wait (pid, support_process_state_stopped + | support_process_state_tracing_stop); + + TEST_COMPARE (kill (pid, SIGCONT), 0); + + enum support_process_state state + = support_process_state_wait (pid, support_process_state_sleeping + | support_process_state_zombie); + + TEST_COMPARE (state, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGTERM), 0); + + siginfo_t info; + TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0); + TEST_COMPARE (info.si_signo, SIGCHLD); + TEST_COMPARE (info.si_code, CLD_KILLED); + TEST_COMPARE (info.si_status, SIGTERM); + TEST_COMPARE (info.si_pid, pid); + + return 0; +} + +static void * +tf (void *) +{ + pause (); + return NULL; +} + +static void +child_mt (void) +{ + /* Let only the created thread to handle signals. */ + sigset_t set; + sigfillset (&set); + xpthread_sigmask (SIG_BLOCK, &set, NULL); + + sigdelset (&set, SIGSTOP); + sigdelset (&set, SIGCONT); + sigdelset (&set, SIGTERM); + + pthread_attr_t attr; + xpthread_attr_init (&attr); + TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0); + + xpthread_join (xpthread_create (&attr, tf, NULL)); +} + +static void +do_test_syscall (bool multithread) +{ + pid_t pid = xfork (); + if (pid == 0) + { + if (multithread) + child_mt (); + else + pause (); + _exit (127); + } + + check_pid (pid); +} + +static int +do_test (void) +{ + /* Check for both single and multi thread, since they use different syscall + mechanisms. */ + do_test_syscall (false); + do_test_syscall (true); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/x86/pkey_get.c b/sysdeps/unix/sysv/linux/x86/pkey_get.c index eb988a5c45..0fb52ba18c 100644 --- a/sysdeps/unix/sysv/linux/x86/pkey_get.c +++ b/sysdeps/unix/sysv/linux/x86/pkey_get.c @@ -18,9 +18,10 @@ #include <arch-pkey.h> #include <errno.h> +#include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { if (key < 0 || key > 15) { @@ -31,3 +32,5 @@ pkey_get (int key) return (pkru >> (2 * key)) & 3; return 0; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/x86/pkey_set.c b/sysdeps/unix/sysv/linux/x86/pkey_set.c index 3fd2c95580..51e3d68d92 100644 --- a/sysdeps/unix/sysv/linux/x86/pkey_set.c +++ b/sysdeps/unix/sysv/linux/x86/pkey_set.c @@ -18,9 +18,10 @@ #include <arch-pkey.h> #include <errno.h> +#include <sys/mman.h> int -pkey_set (int key, unsigned int rights) +__pkey_set (int key, unsigned int rights) { if (key < 0 || key > 15 || rights > 3) { @@ -33,3 +34,5 @@ pkey_set (int key, unsigned int rights) pkey_write (pkru); return 0; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h index b122216013..dfc10d0c7e 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h @@ -189,6 +189,7 @@ #define __NR_mq_timedsend 242 #define __NR_mq_unlink 241 #define __NR_mremap 25 +#define __NR_mseal 462 #define __NR_msgctl 71 #define __NR_msgget 68 #define __NR_msgrcv 70 @@ -358,6 +359,7 @@ #define __NR_unlink 87 #define __NR_unlinkat 263 #define __NR_unshare 272 +#define __NR_uretprobe 335 #define __NR_uselib 134 #define __NR_userfaultfd 323 #define __NR_ustat 136 diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist index aea7848ed6..5acf49dbe8 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist @@ -2744,6 +2744,8 @@ GLIBC_2.4 sys_errlist D 0x420 GLIBC_2.4 sys_nerr D 0x4 GLIBC_2.4 unlinkat F GLIBC_2.4 unshare F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F GLIBC_2.5 __readlinkat_chk F GLIBC_2.5 inet6_opt_append F GLIBC_2.5 inet6_opt_find F diff --git a/sysdeps/unix/sysv/linux/x86_64/syscall_cancel.S b/sysdeps/unix/sysv/linux/x86_64/syscall_cancel.S new file mode 100644 index 0000000000..cda9d20a83 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/syscall_cancel.S @@ -0,0 +1,57 @@ +/* Cancellable syscall wrapper. Linux/x86_64 version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <descr-const.h> + +/* long int [rax] __syscall_cancel_arch (volatile int *cancelhandling [%rdi], + __syscall_arg_t nr [%rsi], + __syscall_arg_t arg1 [%rdx], + __syscall_arg_t arg2 [%rcx], + __syscall_arg_t arg3 [%r8], + __syscall_arg_t arg4 [%r9], + __syscall_arg_t arg5 [SP+8], + __syscall_arg_t arg6 [SP+16]) */ + +ENTRY (__syscall_cancel_arch) + .globl __syscall_cancel_arch_start +__syscall_cancel_arch_start: + + /* if (*cancelhandling & CANCELED_BITMASK) + __syscall_do_cancel() */ + mov (%rdi),%eax + testb $TCB_CANCELED_BITMASK, (%rdi) + jne __syscall_do_cancel + + /* Issue a 6 argument syscall, the nr [%rax] being the syscall + number. */ + mov %rdi,%r11 + mov %rsi,%rax + mov %rdx,%rdi + mov %rcx,%rsi + mov %r8,%rdx + mov %r9,%r10 + mov 8(%rsp),%r8 + mov 16(%rsp),%r9 + mov %r11,8(%rsp) + syscall + + .globl __syscall_cancel_arch_end +__syscall_cancel_arch_end: + ret +END (__syscall_cancel_arch) diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index df3e22236d..9340daa967 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -183,6 +183,7 @@ #define __NR_mq_timedsend 1073742066 #define __NR_mq_unlink 1073742065 #define __NR_mremap 1073741849 +#define __NR_mseal 1073742286 #define __NR_msgctl 1073741895 #define __NR_msgget 1073741892 #define __NR_msgrcv 1073741894 @@ -350,6 +351,7 @@ #define __NR_unlink 1073741911 #define __NR_unlinkat 1073742087 #define __NR_unshare 1073742096 +#define __NR_uretprobe 1073742159 #define __NR_userfaultfd 1073742147 #define __NR_ustat 1073741960 #define __NR_utime 1073741956 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist index 4ab3681914..02d1bb97dc 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist @@ -2763,3 +2763,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F GLIBC_2.39 stdc_trailing_zeros_ul F GLIBC_2.39 stdc_trailing_zeros_ull F GLIBC_2.39 stdc_trailing_zeros_us F +GLIBC_2.41 sched_getattr F +GLIBC_2.41 sched_setattr F diff --git a/sysdeps/generic/dl-fptr.h b/sysdeps/unix/sysv/linux/x86_64/x32/syscall_types.h index 71cfb62411..ac2019751d 100644 --- a/sysdeps/generic/dl-fptr.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/syscall_types.h @@ -1,5 +1,5 @@ -/* Function descriptors. Generic version. - Copyright (C) 1995-2024 Free Software Foundation, Inc. +/* Types and macros used for syscall issuing. x86_64/x32 version. + Copyright (C) 2023 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,30 +16,19 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef dl_fptr_h -#define dl_fptr_h 1 +#ifndef _SYSCALL_TYPES_H +#define _SYSCALL_TYPES_H -/* An FDESC is a function descriptor. */ +#include <libc-diag.h> -struct fdesc - { - ElfW(Addr) ip; /* code entry point */ - ElfW(Addr) gp; /* global pointer */ - }; +typedef long long int __syscall_arg_t; -struct fdesc_table - { - struct fdesc_table *next; - unsigned int len; /* # of entries in fdesc table */ - volatile unsigned int first_unused; /* index of first available entry */ - struct fdesc fdesc[0]; - }; +/* Syscall arguments for x32 follows x86_64 ABI, however pointers are 32 bits + should be zero extended. */ +#define __SSC(__x) \ + ({ \ + TYPEFY (__x, __tmp) = ARGIFY (__x); \ + (__syscall_arg_t) __tmp; \ + }) -struct link_map; - -extern ElfW(Addr) _dl_boot_fptr_table []; - -extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *, - ElfW(Addr)); - -#endif /* !dl_fptr_h */ +#endif diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index c096dd390a..1f30e237f5 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -756,6 +756,12 @@ init_cpu_features (struct cpu_features *cpu_features) unsigned int stepping = 0; enum cpu_features_kind kind; + /* Default is avoid non-temporal memset for non Intel/AMD/Hygon hardware. This is, + as of writing this, we only have benchmarks indicatings it profitability + on Intel/AMD/Hygon. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + |= bit_arch_Avoid_Non_Temporal_Memset; + cpu_features->cachesize_non_temporal_divisor = 4; #if !HAS_CPUID if (__get_cpuid_max (0, 0) == 0) @@ -781,6 +787,11 @@ init_cpu_features (struct cpu_features *cpu_features) update_active (cpu_features); + /* Benchmarks indicate non-temporal memset can be profitable on Intel + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + if (family == 0x06) { model += extended_model; @@ -879,6 +890,7 @@ init_cpu_features (struct cpu_features *cpu_features) non-temporal on all Skylake servers. */ cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] |= bit_arch_Avoid_Non_Temporal_Memset; + /* fallthrough */ case INTEL_BIGCORE_COMETLAKE: case INTEL_BIGCORE_SKYLAKE: case INTEL_BIGCORE_KABYLAKE: @@ -974,9 +986,8 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB] |= bit_arch_Avoid_Short_Distance_REP_MOVSB; } - /* This spells out "AuthenticAMD" or "HygonGenuine". */ - else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) - || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)) + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) { unsigned int extended_model; @@ -991,6 +1002,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx; + /* Benchmarks indicate non-temporal memset can be profitable on AMD + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) { /* Since the FMA4 bit is in CPUID_INDEX_80000001 and @@ -1073,6 +1089,7 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht /* Yongfeng and Shijidadao mircoarch tuning. */ case 0x5b: cpu_features->cachesize_non_temporal_divisor = 2; + /* fallthrough */ case 0x6b: cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] &= ~bit_arch_AVX_Fast_Unaligned_Load; @@ -1086,6 +1103,25 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht } } } + /* This spells out "HygonGenuine". */ + else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) + { + unsigned int extended_model; + + kind = arch_kind_hygon; + + get_common_indices (cpu_features, &family, &model, &extended_model, + &stepping); + + get_extended_indices (cpu_features); + + update_active (cpu_features); + + /* Benchmarks indicate non-temporal memset can be profitable on Hygon + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + } else { kind = arch_kind_other; @@ -1101,6 +1137,10 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht if (CPU_FEATURES_CPU_P (cpu_features, CMOV)) cpu_features->preferred[index_arch_I686] |= bit_arch_I686; + /* No ERMS, we want to avoid stosb for memset. */ + if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + cpu_features->preferred[index_arch_Avoid_STOSB] |= bit_arch_Avoid_STOSB; + #if !HAS_CPUID no_cpuid: #endif diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index ae9dcd6180..cd36de2d8b 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -193,6 +193,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) 11); CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Prefer_FSRM, 11); + CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Avoid_STOSB, + 11); CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH (n, cpu_features, Slow_SSE4_2, SSE4_2, @@ -248,6 +250,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Avoid_Non_Temporal_Memset, 25); } + break; case 26: { CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index a1c03b8903..e9579505a3 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -567,6 +567,48 @@ handle_zhaoxin (int name) return 0; } +static long int __attribute__ ((noinline)) +handle_hygon (int name) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + unsigned int count = 0x1; + + if (name >= _SC_LEVEL3_CACHE_SIZE) + count = 0x3; + else if (name >= _SC_LEVEL2_CACHE_SIZE) + count = 0x2; + else if (name >= _SC_LEVEL1_DCACHE_SIZE) + count = 0x0; + + /* Use __cpuid__ '0x8000_001D' to compute cache details. */ + __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); + + switch (name) + { + case _SC_LEVEL1_ICACHE_ASSOC: + case _SC_LEVEL1_DCACHE_ASSOC: + case _SC_LEVEL2_CACHE_ASSOC: + case _SC_LEVEL3_CACHE_ASSOC: + return ((ebx >> 22) & 0x3ff) + 1; + case _SC_LEVEL1_ICACHE_LINESIZE: + case _SC_LEVEL1_DCACHE_LINESIZE: + case _SC_LEVEL2_CACHE_LINESIZE: + case _SC_LEVEL3_CACHE_LINESIZE: + return (ebx & 0xfff) + 1; + case _SC_LEVEL1_ICACHE_SIZE: + case _SC_LEVEL1_DCACHE_SIZE: + case _SC_LEVEL2_CACHE_SIZE: + case _SC_LEVEL3_CACHE_SIZE: + return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); + default: + __builtin_unreachable (); + } + return -1; +} + static void get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, unsigned int *threads_ptr, long int core) @@ -889,6 +931,24 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) shared_per_thread = shared; } + else if (cpu_features->basic.kind == arch_kind_hygon) + { + data = handle_hygon (_SC_LEVEL1_DCACHE_SIZE); + shared = handle_hygon (_SC_LEVEL3_CACHE_SIZE); + shared_per_thread = shared; + + level1_icache_size = handle_hygon (_SC_LEVEL1_ICACHE_SIZE); + level1_icache_linesize = handle_hygon (_SC_LEVEL1_ICACHE_LINESIZE); + level1_dcache_size = data; + level1_dcache_assoc = handle_hygon (_SC_LEVEL1_DCACHE_ASSOC); + level1_dcache_linesize = handle_hygon (_SC_LEVEL1_DCACHE_LINESIZE); + level2_cache_size = handle_hygon (_SC_LEVEL2_CACHE_SIZE);; + level2_cache_assoc = handle_hygon (_SC_LEVEL2_CACHE_ASSOC); + level2_cache_linesize = handle_hygon (_SC_LEVEL2_CACHE_LINESIZE); + level3_cache_size = shared; + level3_cache_assoc = handle_hygon (_SC_LEVEL3_CACHE_ASSOC); + level3_cache_linesize = handle_hygon (_SC_LEVEL3_CACHE_LINESIZE); + } cpu_features->level1_icache_size = level1_icache_size; cpu_features->level1_icache_linesize = level1_icache_linesize; @@ -988,14 +1048,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) rep_movsb_threshold = 2112; - /* Non-temporal stores are more performant on Intel and AMD hardware above - non_temporal_threshold. Enable this for both Intel and AMD hardware. */ - unsigned long int memset_non_temporal_threshold = SIZE_MAX; - if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset) - && (cpu_features->basic.kind == arch_kind_intel - || cpu_features->basic.kind == arch_kind_amd)) - memset_non_temporal_threshold = non_temporal_threshold; - /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of cases slower than the vectorized path (and for some alignments, it is really slow, check BZ #30994). */ @@ -1017,6 +1069,13 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (tunable_size != 0) shared = tunable_size; + /* Non-temporal stores are more performant on some hardware above + non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both + Intel, AMD and Hygon hardware. */ + unsigned long int memset_non_temporal_threshold = SIZE_MAX; + if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset)) + memset_non_temporal_threshold = non_temporal_threshold; + tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL); if (tunable_size > minimum_non_temporal_threshold && tunable_size <= maximum_non_temporal_threshold) @@ -1042,18 +1101,42 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) slightly better than ERMS. */ rep_stosb_threshold = SIZE_MAX; + /* + For memset, the non-temporal implementation is only accessed through the + stosb code. ie: + ``` + if (size >= rep_stosb_thresh) + { + if (size >= non_temporal_thresh) + { + do_non_temporal (); + } + do_stosb (); + } + do_normal_vec_loop (); + ``` + So if we prefer non-temporal, set `rep_stosb_thresh = non_temporal_thresh` + to enable the implementation. If `rep_stosb_thresh = non_temporal_thresh`, + `rep stosb` will never be used. + */ + TUNABLE_SET_WITH_BOUNDS (x86_memset_non_temporal_threshold, + memset_non_temporal_threshold, + minimum_non_temporal_threshold, SIZE_MAX); + /* Do `rep_stosb_thresh = non_temporal_thresh` after setting/getting the + final value of `x86_memset_non_temporal_threshold`. In some cases this can + be a matter of correctness. */ + if (CPU_FEATURES_ARCH_P (cpu_features, Avoid_STOSB)) + rep_stosb_threshold + = TUNABLE_GET (x86_memset_non_temporal_threshold, long int, NULL); + TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1, + SIZE_MAX); TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX); TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX); TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold, minimum_non_temporal_threshold, maximum_non_temporal_threshold); - TUNABLE_SET_WITH_BOUNDS (x86_memset_non_temporal_threshold, - memset_non_temporal_threshold, - minimum_non_temporal_threshold, SIZE_MAX); TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold, minimum_rep_movsb_threshold, SIZE_MAX); - TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1, - SIZE_MAX); unsigned long int rep_movsb_stop_threshold; /* Setting the upper bound of ERMS to the computed value of diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def index 61bbbc2e89..2a58000147 100644 --- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def @@ -34,3 +34,4 @@ BIT (MathVec_Prefer_No_AVX512) BIT (Prefer_FSRM) BIT (Avoid_Short_Distance_REP_MOVSB) BIT (Avoid_Non_Temporal_Memset) +BIT (Avoid_STOSB) diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index aaae44f0e1..b8e790c8fd 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -881,6 +881,7 @@ enum cpu_features_kind arch_kind_intel, arch_kind_amd, arch_kind_zhaoxin, + arch_kind_hygon, arch_kind_other }; diff --git a/sysdeps/x86/tst-hwcap-tunables.c b/sysdeps/x86/tst-hwcap-tunables.c index 94307283d7..1920f5057e 100644 --- a/sysdeps/x86/tst-hwcap-tunables.c +++ b/sysdeps/x86/tst-hwcap-tunables.c @@ -60,7 +60,8 @@ static const struct test_t /* Disable everything. */ "-Prefer_ERMS,-Prefer_FSRM,-AVX,-AVX2,-AVX512F,-AVX512VL," "-SSE4_1,-SSE4_2,-SSSE3,-Fast_Unaligned_Load,-ERMS," - "-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset", + "-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset," + "-Avoid_STOSB", test_1, array_length (test_1) }, @@ -68,7 +69,8 @@ static const struct test_t /* Same as before, but with some empty suboptions. */ ",-,-Prefer_ERMS,-Prefer_FSRM,-AVX,-AVX2,-AVX512F,-AVX512VL," "-SSE4_1,-SSE4_2,-SSSE3,-Fast_Unaligned_Load,,-," - "-ERMS,-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset,-,", + "-ERMS,-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset," + "-Avoid_STOSB,-,", test_1, array_length (test_1) } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 37d8998c71..80d0125642 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1430,28 +1430,28 @@ Function: "exp10_vlen8_avx2": float: 1 Function: "exp10m1": -double: 2 -float: 1 -float128: 1 -ldouble: 1 +double: 4 +float: 2 +float128: 3 +ldouble: 4 Function: "exp10m1_downward": -double: 1 -float: 1 -float128: 3 -ldouble: 2 +double: 3 +float: 3 +float128: 6 +ldouble: 6 Function: "exp10m1_towardzero": -double: 1 -float: 1 -float128: 3 -ldouble: 2 +double: 2 +float: 3 +float128: 6 +ldouble: 5 Function: "exp10m1_upward": -double: 3 -float: 1 -float128: 3 -ldouble: 2 +double: 5 +float: 4 +float128: 6 +ldouble: 6 Function: "exp2": double: 1 @@ -1498,28 +1498,28 @@ Function: "exp2_vlen8_avx2": float: 1 Function: "exp2m1": -double: 1 -float: 1 -float128: 1 -ldouble: 1 - -Function: "exp2m1_downward": double: 2 -float: 1 +float: 2 float128: 2 ldouble: 3 +Function: "exp2m1_downward": +double: 3 +float: 3 +float128: 3 +ldouble: 6 + Function: "exp2m1_towardzero": -double: 2 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 2 +float128: 4 +ldouble: 5 Function: "exp2m1_upward": -double: 1 -float: 1 -float128: 2 -ldouble: 3 +double: 3 +float: 3 +float128: 5 +ldouble: 6 Function: "exp_downward": double: 1 @@ -1808,28 +1808,28 @@ Function: "log10_vlen8_avx2": float: 1 Function: "log10p1": -double: 1 -float: 1 +double: 2 +float: 2 float128: 3 -ldouble: 2 +ldouble: 4 Function: "log10p1_downward": double: 2 -float: 1 -float128: 2 -ldouble: 4 +float: 3 +float128: 4 +ldouble: 8 Function: "log10p1_towardzero": -double: 2 +double: 3 float: 2 -float128: 2 -ldouble: 4 +float128: 3 +ldouble: 8 Function: "log10p1_upward": double: 2 -float: 1 -float128: 3 -ldouble: 3 +float: 3 +float128: 4 +ldouble: 6 Function: "log1p": double: 1 @@ -2263,25 +2263,21 @@ double: 1 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 10e6e625b3..92409a2779 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 85725e29c0..b5b63fd125 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h index 7a637ef7ca..8dc3d7ab5a 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memset.h +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h @@ -46,6 +46,13 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) attribute_hidden; +static inline int +prefer_erms_nt_impl (const struct cpu_features *cpu_features) +{ + return CPU_FEATURE_USABLE_P (cpu_features, ERMS) + || !CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset); +} + static inline void * IFUNC_SELECTOR (void) { @@ -61,7 +68,7 @@ IFUNC_SELECTOR (void) && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) { - if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + if (prefer_erms_nt_impl (cpu_features)) return OPTIMIZE (avx512_unaligned_erms); return OPTIMIZE (avx512_unaligned); @@ -76,7 +83,7 @@ IFUNC_SELECTOR (void) && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) { - if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + if (prefer_erms_nt_impl (cpu_features)) return OPTIMIZE (evex_unaligned_erms); return OPTIMIZE (evex_unaligned); @@ -84,7 +91,7 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) { - if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + if (prefer_erms_nt_impl (cpu_features)) return OPTIMIZE (avx2_unaligned_erms_rtm); return OPTIMIZE (avx2_unaligned_rtm); @@ -93,14 +100,15 @@ IFUNC_SELECTOR (void) if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER, !)) { - if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + if (prefer_erms_nt_impl (cpu_features)) return OPTIMIZE (avx2_unaligned_erms); return OPTIMIZE (avx2_unaligned); } } - if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + if (CPU_FEATURE_USABLE_P (cpu_features, ERMS) + || !CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset)) return OPTIMIZE (sse2_unaligned_erms); return OPTIMIZE (sse2_unaligned); diff --git a/sysdeps/x86_64/multiarch/strchr-evex-base.S b/sysdeps/x86_64/multiarch/strchr-evex-base.S index 04e2c0e79e..3a0b7c9d64 100644 --- a/sysdeps/x86_64/multiarch/strchr-evex-base.S +++ b/sysdeps/x86_64/multiarch/strchr-evex-base.S @@ -124,13 +124,13 @@ L(page_cross): VPCMPNE %VMM(1), %VMM(0), %k1 VPTEST %VMM(1), %VMM(1), %k0{%k1} KMOV %k0, %VRAX -# ifdef USE_AS_WCSCHR + sar %cl, %VRAX +#ifdef USE_AS_WCSCHR sub $VEC_MATCH_MASK, %VRAX -# else +#else inc %VRAX -# endif +#endif /* Ignore number of character for alignment adjustment. */ - shr %cl, %VRAX jz L(align_more) bsf %VRAX, %VRAX diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S index 06730ab2a1..cea034f394 100644 --- a/sysdeps/x86_64/multiarch/strcmp-evex.S +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -209,7 +209,9 @@ returned. */ .section SECTION(.text), "ax", @progbits - .align 16 + /* Align 64 bytes here. This is to get the L(loop) block ideally + aligned for the DSB. */ + .align 64 .type STRCMP, @function .globl STRCMP # ifdef USE_AS_STRCASECMP_L @@ -509,9 +511,7 @@ L(ret4): ret # endif - /* 32 byte align here ensures the main loop is ideally aligned - for DSB. */ - .p2align 5 + .p2align 4,, 4 L(more_3x_vec): /* Safe to compare 4x vectors. */ VMOVU (VEC_SIZE)(%rdi), %VMM(0) @@ -1426,10 +1426,9 @@ L(less_32_till_page): L(ret_zero_page_cross_slow_case0): xorl %eax, %eax ret -# endif - - +# else .p2align 4,, 10 +# endif L(less_16_till_page): cmpl $((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax ja L(less_8_till_page) @@ -1482,8 +1481,12 @@ L(less_16_till_page): # endif jmp L(prepare_loop_aligned) - - +# ifndef USE_AS_STRNCMP + /* Fits in aligning bytes. */ +L(ret_zero_4_loop): + xorl %eax, %eax + ret +# endif .p2align 4,, 10 L(less_8_till_page): @@ -1554,6 +1557,7 @@ L(ret_less_8_wcs): # ifdef USE_AS_STRNCMP .p2align 4,, 2 +L(ret_zero_4_loop): L(ret_zero_page_cross_slow_case1): xorl %eax, %eax ret @@ -1586,10 +1590,6 @@ L(less_4_loop): subq $-(CHAR_PER_VEC * 4), %rdx # endif jmp L(prepare_loop_aligned) - -L(ret_zero_4_loop): - xorl %eax, %eax - ret L(ret_less_4_loop): xorl %r8d, %eax subl %r8d, %eax diff --git a/sysdeps/x86_64/multiarch/strnlen-evex-base.S b/sysdeps/x86_64/multiarch/strnlen-evex-base.S new file mode 100644 index 0000000000..1c2cfdfe06 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strnlen-evex-base.S @@ -0,0 +1,462 @@ +/* strnlen/wcsnlen optimized with 256/512-bit EVEX instructions. + Copyright (C) 2022-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) + +# include <sysdep.h> + +#ifdef USE_AS_WCSLEN +# define VPCMPEQ vpcmpeqd +# define VPTESTN vptestnmd +# define VPMINU vpminud +# define CHAR_SIZE 4 +#else +# define VPCMPEQ vpcmpeqb +# define VPTESTN vptestnmb +# define VPMINU vpminub +# define CHAR_SIZE 1 +#endif + +#define XZERO VMM_128(0) +#define VZERO VMM(0) +#define PAGE_SIZE 4096 +#define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + +#if CHAR_PER_VEC == 32 +# define SUB_SHORT(imm, reg) subb $(imm), %VGPR_SZ(reg, 8) +#else +# define SUB_SHORT(imm, reg) subl $(imm), %VGPR_SZ(reg, 32) +#endif + +#ifdef USE_AS_WCSLEN +/* For wide-character, we care more about limitting code size + than optimally aligning targets, so just cap nop padding + reasonably low. */ +# define P2ALIGN(...) .p2align 4,, 6 +# define P2ALIGN_CLAMPED(...) P2ALIGN(__VA_ARGS__) +#else +# define P2ALIGN(x) .p2align x +# define P2ALIGN_CLAMPED(x, y) .p2align x,, y +#endif + + .section SECTION(.text), "ax", @progbits + /* Aligning entry point to 64 byte, provides better performance for + one vector length string. */ +ENTRY_P2ALIGN(STRNLEN, 6) + /* rdi is pointer to array, rsi is the upper limit. */ + + /* Check zero length. */ + test %RSI_LP, %RSI_LP + jz L(zero) + +#ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %esi, %esi +#endif + + vpxorq %XZERO, %XZERO, %XZERO + + /* Check that we won't cross a page boundary with our first load. */ + movl %edi, %eax + shll $20, %eax + cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax + ja L(crosses_page_boundary) + + /* Check the first VEC_SIZE bytes. Each bit in K0 represents a + null byte. */ + VPCMPEQ (%rdi), %VZERO, %k0 + KMOV %k0, %VRCX + + /* If src (rcx) is zero, bsf does not change the result. NB: + Must use 64-bit bsf here so that upper bits of len are not + cleared. */ + movq %rsi, %rax + bsfq %rcx, %rax + + /* If rax > CHAR_PER_VEC then rcx must have been zero (no null + CHAR) and rsi must be > CHAR_PER_VEC. */ + cmpq $CHAR_PER_VEC, %rax + ja L(more_1x_vec) + + /* Check if first match in bounds. */ + cmpq %rax, %rsi + cmovb %esi, %eax + ret + +#if VEC_SIZE == 32 + P2ALIGN_CLAMPED(4, 2) +L(zero): +L(max_0): + movl %esi, %eax + ret +#endif + + P2ALIGN_CLAMPED(4, 10) +L(more_1x_vec): +L(cross_page_continue): + /* After this calculation, rax stores the number of elements + left to be processed The complexity comes from the fact some + elements get read twice due to alignment and we need to be + sure we don't count them twice (else, it would just be rsi - + CHAR_PER_VEC). */ + +#ifdef USE_AS_WCSLEN + /* Need to compute directly for wcslen as CHAR_SIZE * rsi can + overflow. */ + movq %rdi, %rax + andq $(VEC_SIZE * -1), %rdi + subq %rdi, %rax + sarq $2, %rax + leaq -(CHAR_PER_VEC * 1)(%rax, %rsi), %rax +#else + /* Calculate ptr + N - VEC_SIZE, then mask off the low bits, + then subtract ptr to get the new aligned limit value. */ + leaq (VEC_SIZE * -1)(%rsi, %rdi), %rax + andq $(VEC_SIZE * -1), %rdi + subq %rdi, %rax +#endif + + VPCMPEQ VEC_SIZE(%rdi), %VZERO, %k0 + + /* Checking here is faster for 256-bit but not 512-bit */ +#if VEC_SIZE == 0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_check) +#endif + + cmpq $(CHAR_PER_VEC * 2), %rax + ja L(more_2x_vec) + +L(last_2x_vec_or_less): + + /* Checking here is faster for 512-bit but not 256-bit */ +#if VEC_SIZE != 0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_check) +#endif + + /* Check for the end of data. */ + SUB_SHORT (CHAR_PER_VEC, rax) + jbe L(max_0) + + /* Check the final remaining vector. */ + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX +#if VEC_SIZE == 32 + jz L(max_0) +#else + jnz L(last_vec_check) + P2ALIGN_CLAMPED(4, 2) +L(zero): +L(max_0): + movl %esi, %eax + ret + +#endif + P2ALIGN_CLAMPED(4, 4) +L(last_vec_check): + bsf %VRDX, %VRDX + sub %eax, %edx + lea (%rsi, %rdx), %eax + cmovae %esi, %eax + ret + + +#if VEC_SIZE == 32 + P2ALIGN_CLAMPED(4, 8) +#endif +L(last_4x_vec_or_less): + addl $(CHAR_PER_VEC * -4), %eax + VPCMPEQ (VEC_SIZE * 5)(%rdi), %VZERO, %k0 + +#if VEC_SIZE == 64 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_check) +#endif + + subq $(VEC_SIZE * -4), %rdi + cmpl $(CHAR_PER_VEC * 2), %eax + jbe L(last_2x_vec_or_less) + + P2ALIGN_CLAMPED(4, 6) +L(more_2x_vec): + /* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without + rechecking bounds. */ + + /* Already checked in 256-bit case */ +#if VEC_SIZE != 0 + KMOV %k0, %VRDX + + test %VRDX, %VRDX + jnz L(first_vec_x1) +#endif + + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + + test %VRDX, %VRDX + jnz L(first_vec_x2) + + cmpq $(CHAR_PER_VEC * 4), %rax + ja L(more_4x_vec) + + + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + addl $(CHAR_PER_VEC * -2), %eax + test %VRDX, %VRDX + jnz L(last_vec_check) + + subb $(CHAR_PER_VEC), %al + jbe L(max_1) + + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + + test %VRDX, %VRDX + jnz L(last_vec_check) +L(max_1): + movl %esi, %eax + ret + + + P2ALIGN_CLAMPED(4, 14) +L(first_vec_x2): +#if VEC_SIZE == 64 + /* If VEC_SIZE == 64 we can fit logic for full return label in + spare bytes before next cache line. */ + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 1)(%rsi, %rdx), %eax + ret + P2ALIGN_CLAMPED(4, 6) +#else + addl $CHAR_PER_VEC, %esi +#endif +L(first_vec_x1): + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 0)(%rsi, %rdx), %eax + ret + +#if VEC_SIZE == 64 + P2ALIGN_CLAMPED(4, 6) +L(first_vec_x4): +# if VEC_SIZE == 64 + /* If VEC_SIZE == 64 we can fit logic for full return label in + spare bytes before next cache line. */ + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 3)(%rsi, %rdx), %eax + ret + P2ALIGN_CLAMPED(4, 6) +# else + addl $CHAR_PER_VEC, %esi +# endif +L(first_vec_x3): + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 2)(%rsi, %rdx), %eax + ret +#endif + + P2ALIGN_CLAMPED(6, 20) +L(more_4x_vec): + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(first_vec_x3) + + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(first_vec_x4) + + /* Check if at last VEC_SIZE * 4 length before aligning for the + loop. */ + cmpq $(CHAR_PER_VEC * 8), %rax + jbe L(last_4x_vec_or_less) + + + /* Compute number of words checked after aligning. */ +#ifdef USE_AS_WCSLEN + /* Need to compute directly for wcslen as CHAR_SIZE * rsi can + overflow. */ + leaq (VEC_SIZE * -3)(%rdi), %rdx +#else + leaq (VEC_SIZE * -3)(%rdi, %rax), %rax +#endif + + subq $(VEC_SIZE * -1), %rdi + + /* Align data to VEC_SIZE * 4. */ +#if VEC_SIZE == 64 + /* Saves code size. No evex512 processor has partial register + stalls. If that change this can be replaced with `andq + $-(VEC_SIZE * 4), %rdi`. */ + xorb %dil, %dil +#else + andq $-(VEC_SIZE * 4), %rdi +#endif + +#ifdef USE_AS_WCSLEN + subq %rdi, %rdx + sarq $2, %rdx + addq %rdx, %rax +#else + subq %rdi, %rax +#endif + + // mov %rdi, %rdx + + P2ALIGN(6) +L(loop): + /* VPMINU and VPCMP combination provide better performance as + compared to alternative combinations. */ + VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) + VPMINU (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) + VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) + VPMINU (VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4) + + VPTESTN %VMM(2), %VMM(2), %k0 + VPTESTN %VMM(4), %VMM(4), %k1 + + subq $-(VEC_SIZE * 4), %rdi + KORTEST %k0, %k1 + + jnz L(loopend) + subq $(CHAR_PER_VEC * 4), %rax + ja L(loop) + mov %rsi, %rax + ret + + +#if VEC_SIZE == 32 + P2ALIGN_CLAMPED(4, 6) +L(first_vec_x4): +# if VEC_SIZE == 64 + /* If VEC_SIZE == 64 we can fit logic for full return label in + spare bytes before next cache line. */ + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 3)(%rsi, %rdx), %eax + ret + P2ALIGN_CLAMPED(4, 6) +# else + addl $CHAR_PER_VEC, %esi +# endif +L(first_vec_x3): + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 2)(%rsi, %rdx), %eax + ret +#endif + + + P2ALIGN_CLAMPED(4, 11) +L(loopend): + /* We found a null terminator in one of the 4 vectors. */ + + /* Check the first vector. */ + movq %rax, %r8 + VPTESTN %VMM(1), %VMM(1), %k2 + KMOV %k2, %VRCX + bsf %rcx, %r8 + + cmpq $(CHAR_PER_VEC), %r8 + jbe L(end_vec) + + /* Check the second vector. */ + subq $(CHAR_PER_VEC), %rax + movq %rax, %r8 + KMOV %k0, %VRCX + bsf %rcx, %r8 + + cmpq $(CHAR_PER_VEC), %r8 + jbe L(end_vec) + + /* Check the third vector. */ + subq $(CHAR_PER_VEC), %rax + movq %rax, %r8 + VPTESTN %VMM(3), %VMM(3), %k2 + KMOV %k2, %VRCX + bsf %rcx, %r8 + + cmpq $(CHAR_PER_VEC), %r8 + jbe L(end_vec) + + /* It is in the fourth vector. */ + subq $(CHAR_PER_VEC), %rax + movq %rax, %r8 + KMOV %k1, %VRCX + bsf %rcx, %r8 + + P2ALIGN_CLAMPED(4, 3) +L(end_vec): + /* Get the number that has been processed. */ + movq %rsi, %rcx + subq %rax, %rcx + + /* Add that to the offset we found the null terminator at. */ + leaq (%r8, %rcx), %rax + + /* Take the min of that and the limit. */ + cmpq %rsi, %rax + cmovnb %rsi, %rax + ret + + P2ALIGN_CLAMPED(4, 11) +L(crosses_page_boundary): + /* Align data backwards to VEC_SIZE. */ + shrl $20, %eax + movq %rdi, %rcx + andq $-VEC_SIZE, %rcx + VPCMPEQ (%rcx), %VZERO, %k0 + + KMOV %k0, %VRCX +#ifdef USE_AS_WCSLEN + shrl $2, %eax + andl $(CHAR_PER_VEC - 1), %eax +#endif + /* By this point rax contains number of bytes we need to skip. */ + shrx %VRAX, %VRCX, %VRCX + + /* Calculates CHAR_PER_VEC - eax and stores in eax. */ + negl %eax + andl $(CHAR_PER_VEC - 1), %eax + + movq %rsi, %rdx + bsf %VRCX, %VRDX + cmpq %rax, %rdx + ja L(cross_page_continue) + + /* The vector had a null terminator or we are at the limit. */ + movl %edx, %eax + cmpq %rdx, %rsi + cmovb %esi, %eax + ret + +END(STRNLEN) +#endif diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S index 91b16830eb..c41288906c 100644 --- a/sysdeps/x86_64/multiarch/strnlen-evex.S +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S @@ -1,423 +1,7 @@ -/* strnlen/wcsnlen optimized with 256-bit EVEX instructions. - Copyright (C) 2022-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <isa-level.h> -#include <sysdep.h> - -#if ISA_SHOULD_BUILD (4) - -# ifndef VEC_SIZE -# include "x86-evex256-vecs.h" -# endif - - -# ifndef STRNLEN -# define STRNLEN __strnlen_evex -# endif - -# ifdef USE_AS_WCSLEN -# define VPCMPEQ vpcmpeqd -# define VPCMPNEQ vpcmpneqd -# define VPTESTN vptestnmd -# define VPTEST vptestmd -# define VPMINU vpminud -# define CHAR_SIZE 4 - -# else -# define VPCMPEQ vpcmpeqb -# define VPCMPNEQ vpcmpneqb -# define VPTESTN vptestnmb -# define VPTEST vptestmb -# define VPMINU vpminub -# define CHAR_SIZE 1 - -# define REG_WIDTH VEC_SIZE -# endif - -# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - -# include "reg-macros.h" - -# if CHAR_PER_VEC == 32 -# define SUB_SHORT(imm, reg) subb $(imm), %VGPR_SZ(reg, 8) -# else -# define SUB_SHORT(imm, reg) subl $(imm), %VGPR_SZ(reg, 32) -# endif - - - -# if CHAR_PER_VEC == 64 -# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 3) -# else -# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 2) -# endif - - -# define XZERO VMM_128(0) -# define VZERO VMM(0) -# define PAGE_SIZE 4096 - - .section SECTION(.text), "ax", @progbits -ENTRY_P2ALIGN (STRNLEN, 6) - /* Check zero length. */ - test %RSI_LP, %RSI_LP - jz L(zero) -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %esi, %esi -# endif - - movl %edi, %eax - vpxorq %XZERO, %XZERO, %XZERO - andl $(PAGE_SIZE - 1), %eax - cmpl $(PAGE_SIZE - VEC_SIZE), %eax - ja L(cross_page_boundary) - - /* Check the first VEC_SIZE bytes. Each bit in K0 represents a - null byte. */ - VPCMPEQ (%rdi), %VZERO, %k0 - - KMOV %k0, %VRCX - movq %rsi, %rax - - /* If src (rcx) is zero, bsf does not change the result. NB: - Must use 64-bit bsf here so that upper bits of len are not - cleared. */ - bsfq %rcx, %rax - /* If rax > CHAR_PER_VEC then rcx must have been zero (no null - CHAR) and rsi must be > CHAR_PER_VEC. */ - cmpq $CHAR_PER_VEC, %rax - ja L(more_1x_vec) - /* Check if first match in bounds. */ - cmpq %rax, %rsi - cmovb %esi, %eax - ret - - -# if CHAR_PER_VEC != 32 - .p2align 4,, 2 -L(zero): -L(max_0): - movl %esi, %eax - ret -# endif - - /* Aligned more for strnlen compares remaining length vs 2 * - CHAR_PER_VEC, 4 * CHAR_PER_VEC, and 8 * CHAR_PER_VEC before - going to the loop. */ - .p2align 4,, 10 -L(more_1x_vec): -L(cross_page_continue): - /* Compute number of words checked after aligning. */ -# ifdef USE_AS_WCSLEN - /* Need to compute directly for wcslen as CHAR_SIZE * rsi can - overflow. */ - movq %rdi, %rax - andq $(VEC_SIZE * -1), %rdi - subq %rdi, %rax - sarq $2, %rax - leaq -(CHAR_PER_VEC * 1)(%rax, %rsi), %rax -# else - leaq (VEC_SIZE * -1)(%rsi, %rdi), %rax - andq $(VEC_SIZE * -1), %rdi - subq %rdi, %rax -# endif - - - VPCMPEQ VEC_SIZE(%rdi), %VZERO, %k0 - - cmpq $(CHAR_PER_VEC * 2), %rax - ja L(more_2x_vec) - -L(last_2x_vec_or_less): - KMOV %k0, %VRDX - test %VRDX, %VRDX - jnz L(last_vec_check) - - /* Check the end of data. */ - SUB_SHORT (CHAR_PER_VEC, rax) - jbe L(max_0) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 - KMOV %k0, %VRDX - test %VRDX, %VRDX - jz L(max_0) - /* Best place for LAST_VEC_CHECK if ZMM. */ - .p2align 4,, 8 -L(last_vec_check): - bsf %VRDX, %VRDX - sub %eax, %edx - lea (%rsi, %rdx), %eax - cmovae %esi, %eax - ret - -# if CHAR_PER_VEC == 32 - .p2align 4,, 2 -L(zero): -L(max_0): - movl %esi, %eax - ret -# endif - - .p2align 4,, 8 -L(last_4x_vec_or_less): - addl $(CHAR_PER_VEC * -4), %eax - VPCMPEQ (VEC_SIZE * 5)(%rdi), %VZERO, %k0 - subq $(VEC_SIZE * -4), %rdi - cmpl $(CHAR_PER_VEC * 2), %eax - jbe L(last_2x_vec_or_less) - - .p2align 4,, 6 -L(more_2x_vec): - /* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without - rechecking bounds. */ - - KMOV %k0, %VRDX - - test %VRDX, %VRDX - jnz L(first_vec_x1) - - VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 - KMOV %k0, %VRDX - test %VRDX, %VRDX - jnz L(first_vec_x2) - - cmpq $(CHAR_PER_VEC * 4), %rax - ja L(more_4x_vec) - - - VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 - KMOV %k0, %VRDX - addl $(CHAR_PER_VEC * -2), %eax - test %VRDX, %VRDX - jnz L(last_vec_check) - - subl $(CHAR_PER_VEC), %eax - jbe L(max_1) - - VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 - KMOV %k0, %VRDX - - test %VRDX, %VRDX - jnz L(last_vec_check) -L(max_1): - movl %esi, %eax - ret - - .p2align 4,, 3 -L(first_vec_x2): -# if VEC_SIZE == 64 - /* If VEC_SIZE == 64 we can fit logic for full return label in - spare bytes before next cache line. */ - bsf %VRDX, %VRDX - sub %eax, %esi - leal (CHAR_PER_VEC * 1)(%rsi, %rdx), %eax - ret - .p2align 4,, 6 -# else - addl $CHAR_PER_VEC, %esi -# endif -L(first_vec_x1): - bsf %VRDX, %VRDX - sub %eax, %esi - leal (CHAR_PER_VEC * 0)(%rsi, %rdx), %eax - ret - - - .p2align 4,, 6 -L(first_vec_x4): -# if VEC_SIZE == 64 - /* If VEC_SIZE == 64 we can fit logic for full return label in - spare bytes before next cache line. */ - bsf %VRDX, %VRDX - sub %eax, %esi - leal (CHAR_PER_VEC * 3)(%rsi, %rdx), %eax - ret - .p2align 4,, 6 -# else - addl $CHAR_PER_VEC, %esi -# endif -L(first_vec_x3): - bsf %VRDX, %VRDX - sub %eax, %esi - leal (CHAR_PER_VEC * 2)(%rsi, %rdx), %eax - ret - - .p2align 4,, 5 -L(more_4x_vec): - VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 - KMOV %k0, %VRDX - test %VRDX, %VRDX - jnz L(first_vec_x3) - - VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 - KMOV %k0, %VRDX - test %VRDX, %VRDX - jnz L(first_vec_x4) - - /* Check if at last VEC_SIZE * 4 length before aligning for the - loop. */ - cmpq $(CHAR_PER_VEC * 8), %rax - jbe L(last_4x_vec_or_less) - - - /* Compute number of words checked after aligning. */ -# ifdef USE_AS_WCSLEN - /* Need to compute directly for wcslen as CHAR_SIZE * rsi can - overflow. */ - leaq (VEC_SIZE * -3)(%rdi), %rdx -# else - leaq (VEC_SIZE * -3)(%rdi, %rax), %rax -# endif - - subq $(VEC_SIZE * -1), %rdi - - /* Align data to VEC_SIZE * 4. */ -# if VEC_SIZE == 64 - /* Saves code size. No evex512 processor has partial register - stalls. If that change this can be replaced with `andq - $-(VEC_SIZE * 4), %rdi`. */ - xorb %dil, %dil -# else - andq $-(VEC_SIZE * 4), %rdi -# endif - -# ifdef USE_AS_WCSLEN - subq %rdi, %rdx - sarq $2, %rdx - addq %rdx, %rax -# else - subq %rdi, %rax -# endif - /* Compare 4 * VEC at a time forward. */ - .p2align 4,, 11 -L(loop_4x_vec): - VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) - VPMINU (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) - VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) - VPMINU (VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4) - VPTESTN %VMM(2), %VMM(2), %k0 - VPTESTN %VMM(4), %VMM(4), %k2 - subq $-(VEC_SIZE * 4), %rdi - /* Break if at end of length. */ - subq $(CHAR_PER_VEC * 4), %rax - jbe L(loop_len_end) - - - KORTEST %k0, %k2 - jz L(loop_4x_vec) - - -L(loop_last_4x_vec): - movq %rsi, %rcx - subq %rax, %rsi - VPTESTN %VMM(1), %VMM(1), %k1 - KMOV %k1, %VRDX - test %VRDX, %VRDX - jnz L(last_vec_x0) - - KMOV %k0, %VRDX - test %VRDX, %VRDX - jnz L(last_vec_x1) - - VPTESTN %VMM(3), %VMM(3), %k0 - - /* Separate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for - returning last 2x VEC. For VEC_SIZE == 64 we test each VEC - individually, for VEC_SIZE == 32 we combine them in a single - 64-bit GPR. */ -# if CHAR_PER_VEC == 64 - KMOV %k0, %VRDX - test %VRDX, %VRDX - jnz L(last_vec_x2) - KMOV %k2, %VRDX -# else - /* We can only combine last 2x VEC masks if CHAR_PER_VEC <= 32. - */ - kmovd %k2, %edx - kmovd %k0, %eax - salq $CHAR_PER_VEC, %rdx - orq %rax, %rdx -# endif - - /* first_vec_x3 for strlen-ZMM and first_vec_x2 for strlen-YMM. - */ - bsfq %rdx, %rdx - leaq (FALLTHROUGH_RETURN_OFFSET - CHAR_PER_VEC * 4)(%rsi, %rdx), %rax - cmpq %rax, %rcx - cmovb %rcx, %rax - ret - - /* Handle last 4x VEC after loop. All VECs have been loaded. */ - .p2align 4,, 4 -L(loop_len_end): - KORTEST %k0, %k2 - jnz L(loop_last_4x_vec) - movq %rsi, %rax - ret - - -# if CHAR_PER_VEC == 64 - /* Since we can't combine the last 2x VEC for VEC_SIZE == 64 - need return label for it. */ - .p2align 4,, 8 -L(last_vec_x2): - bsf %VRDX, %VRDX - leaq (CHAR_PER_VEC * -2)(%rsi, %rdx), %rax - cmpq %rax, %rcx - cmovb %rcx, %rax - ret -# endif - - - .p2align 4,, 10 -L(last_vec_x1): - addq $CHAR_PER_VEC, %rsi -L(last_vec_x0): - bsf %VRDX, %VRDX - leaq (CHAR_PER_VEC * -4)(%rsi, %rdx), %rax - cmpq %rax, %rcx - cmovb %rcx, %rax - ret - - - .p2align 4,, 8 -L(cross_page_boundary): - /* Align data to VEC_SIZE. */ - movq %rdi, %rcx - andq $-VEC_SIZE, %rcx - VPCMPEQ (%rcx), %VZERO, %k0 - - KMOV %k0, %VRCX -# ifdef USE_AS_WCSLEN - shrl $2, %eax - andl $(CHAR_PER_VEC - 1), %eax -# endif - shrx %VRAX, %VRCX, %VRCX - - negl %eax - andl $(CHAR_PER_VEC - 1), %eax - movq %rsi, %rdx - bsf %VRCX, %VRDX - cmpq %rax, %rdx - ja L(cross_page_continue) - movl %edx, %eax - cmpq %rdx, %rsi - cmovb %esi, %eax - ret -END (STRNLEN) +#ifndef STRNLEN +#define STRNLEN __strnlen_evex #endif + +#include "x86-evex256-vecs.h" +#include "reg-macros.h" +#include "strnlen-evex-base.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-evex512.S b/sysdeps/x86_64/multiarch/strnlen-evex512.S index f8e55883bb..8ef54078f8 100644 --- a/sysdeps/x86_64/multiarch/strnlen-evex512.S +++ b/sysdeps/x86_64/multiarch/strnlen-evex512.S @@ -1,264 +1,7 @@ -/* Placeholder function, not used by any processor at the moment. - Copyright (C) 2022-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - #ifndef STRNLEN #define STRNLEN __strnlen_evex512 #endif #include "x86-evex512-vecs.h" #include "reg-macros.h" - -#include <isa-level.h> - -#if ISA_SHOULD_BUILD (4) - -# include <sysdep.h> - -# ifdef USE_AS_WCSLEN -# define VPCMPEQ vpcmpeqd -# define VPTESTN vptestnmd -# define VPMINU vpminud -# define CHAR_SIZE 4 -# else -# define VPCMPEQ vpcmpeqb -# define VPTESTN vptestnmb -# define VPMINU vpminub -# define CHAR_SIZE 1 -# endif - -# define PAGE_SIZE 4096 -# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - - .section SECTION(.text),"ax",@progbits -/* Aligning entry point to 64 byte, provides better performance for - one vector length string. */ -ENTRY_P2ALIGN (STRNLEN, 6) - /* Check zero length. */ - test %RSI_LP, %RSI_LP - jz L(ret_max) -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %esi, %esi -# endif - - movl %edi, %eax - vpxorq %VMM_128(0), %VMM_128(0), %VMM_128(0) - sall $20, %eax - cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax - ja L(page_cross) - - /* Compare [w]char for null, mask bit will be set for match. */ - VPCMPEQ (%rdi), %VMM(0), %k0 - KMOV %k0, %VRCX - /* Store max length in rax. */ - mov %rsi, %rax - /* If rcx is 0, rax will have max length. We can not use VRCX - and VRAX here for evex256 because, upper 32 bits may be - undefined for ecx and eax. */ - bsfq %rcx, %rax - cmp $CHAR_PER_VEC, %rax - ja L(align_more) - cmpq %rax, %rsi - cmovb %esi, %eax - ret - - /* At this point vector max length reached. */ - .p2align 4,,3 -L(ret_max): - movq %rsi, %rax - ret - -L(align_more): - mov %rdi, %rax - /* Align rax to VEC_SIZE. */ - andq $-VEC_SIZE, %rax - movq %rdi, %rdx - subq %rax, %rdx -# ifdef USE_AS_WCSLEN - shr $2, %VRDX -# endif - /* At this point rdx contains [w]chars already compared. */ - leaq -CHAR_PER_VEC(%rsi, %rdx), %rdx - /* At this point rdx contains number of w[char] needs to go. - Now onwards rdx will keep decrementing with each compare. */ - - /* Loop unroll 4 times for 4 vector loop. */ - VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0 - subq $-VEC_SIZE, %rax - KMOV %k0, %VRCX - test %VRCX, %VRCX - jnz L(ret_vec_x1) - - subq $CHAR_PER_VEC, %rdx - jbe L(ret_max) - - VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0 - KMOV %k0, %VRCX - test %VRCX, %VRCX - jnz L(ret_vec_x2) - - subq $CHAR_PER_VEC, %rdx - jbe L(ret_max) - - VPCMPEQ (VEC_SIZE * 2)(%rax), %VMM(0), %k0 - KMOV %k0, %VRCX - test %VRCX, %VRCX - jnz L(ret_vec_x3) - - subq $CHAR_PER_VEC, %rdx - jbe L(ret_max) - - VPCMPEQ (VEC_SIZE * 3)(%rax), %VMM(0), %k0 - KMOV %k0, %VRCX - test %VRCX, %VRCX - jnz L(ret_vec_x4) - - subq $CHAR_PER_VEC, %rdx - jbe L(ret_max) - /* Save pointer before 4 x VEC_SIZE alignment. */ - movq %rax, %rcx - - /* Align address to VEC_SIZE * 4 for loop. */ - andq $-(VEC_SIZE * 4), %rax - - subq %rax, %rcx -# ifdef USE_AS_WCSLEN - shr $2, %VRCX -# endif - /* rcx contains number of [w]char will be recompared due to - alignment fixes. rdx must be incremented by rcx to offset - alignment adjustment. */ - addq %rcx, %rdx - /* Need jump as we don't want to add/subtract rdx for first - iteration of 4 x VEC_SIZE aligned loop. */ - - .p2align 4,,11 -L(loop): - /* VPMINU and VPCMP combination provide better performance as - compared to alternative combinations. */ - VMOVA (VEC_SIZE * 4)(%rax), %VMM(1) - VPMINU (VEC_SIZE * 5)(%rax), %VMM(1), %VMM(2) - VMOVA (VEC_SIZE * 6)(%rax), %VMM(3) - VPMINU (VEC_SIZE * 7)(%rax), %VMM(3), %VMM(4) - - VPTESTN %VMM(2), %VMM(2), %k0 - VPTESTN %VMM(4), %VMM(4), %k1 - - subq $-(VEC_SIZE * 4), %rax - KORTEST %k0, %k1 - - jnz L(loopend) - subq $(CHAR_PER_VEC * 4), %rdx - ja L(loop) - mov %rsi, %rax - ret - -L(loopend): - - VPTESTN %VMM(1), %VMM(1), %k2 - KMOV %k2, %VRCX - test %VRCX, %VRCX - jnz L(ret_vec_x1) - - KMOV %k0, %VRCX - /* At this point, if k0 is non zero, null char must be in the - second vector. */ - test %VRCX, %VRCX - jnz L(ret_vec_x2) - - VPTESTN %VMM(3), %VMM(3), %k3 - KMOV %k3, %VRCX - test %VRCX, %VRCX - jnz L(ret_vec_x3) - /* At this point null [w]char must be in the fourth vector so no - need to check. */ - KMOV %k1, %VRCX - - /* Fourth, third, second vector terminating are pretty much - same, implemented this way to avoid branching and reuse code - from pre loop exit condition. */ -L(ret_vec_x4): - bsf %VRCX, %VRCX - subq %rdi, %rax -# ifdef USE_AS_WCSLEN - subq $-(VEC_SIZE * 3), %rax - shrq $2, %rax - addq %rcx, %rax -# else - leaq (VEC_SIZE * 3)(%rcx, %rax), %rax -# endif - - cmpq %rsi, %rax - cmovnb %rsi, %rax - ret - -L(ret_vec_x3): - bsf %VRCX, %VRCX - subq %rdi, %rax -# ifdef USE_AS_WCSLEN - subq $-(VEC_SIZE * 2), %rax - shrq $2, %rax - addq %rcx, %rax -# else - leaq (VEC_SIZE * 2)(%rcx, %rax), %rax -# endif - cmpq %rsi, %rax - cmovnb %rsi, %rax - ret - -L(ret_vec_x2): - subq $-VEC_SIZE, %rax -L(ret_vec_x1): - bsf %VRCX, %VRCX - subq %rdi, %rax -# ifdef USE_AS_WCSLEN - shrq $2, %rax -# endif - addq %rcx, %rax - cmpq %rsi, %rax - cmovnb %rsi, %rax - ret - -L(page_cross): - mov %rdi, %rax - movl %edi, %ecx - andl $(VEC_SIZE - 1), %ecx -# ifdef USE_AS_WCSLEN - sarl $2, %ecx -# endif - /* ecx contains number of w[char] to be skipped as a result - of address alignment. */ - andq $-VEC_SIZE, %rax - VPCMPEQ (%rax), %VMM(0), %k0 - KMOV %k0, %VRDX - /* Ignore number of character for alignment adjustment. */ - shr %cl, %VRDX - jnz L(page_cross_end) - movl $CHAR_PER_VEC, %eax - sub %ecx, %eax - cmp %rax, %rsi - ja L(align_more) - -L(page_cross_end): - bsf %VRDX, %VRAX - cmpq %rsi, %rax - cmovnb %esi, %eax - ret - -END (STRNLEN) -#endif +#include "strnlen-evex-base.S" \ No newline at end of file diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym index 2bbd563a6c..988a4b8593 100644 --- a/sysdeps/x86_64/nptl/tcb-offsets.sym +++ b/sysdeps/x86_64/nptl/tcb-offsets.sym @@ -13,6 +13,3 @@ MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads) POINTER_GUARD offsetof (tcbhead_t, pointer_guard) FEATURE_1_OFFSET offsetof (tcbhead_t, feature_1) SSP_BASE_OFFSET offsetof (tcbhead_t, ssp_base) - --- Not strictly offsets, but these values are also used in the TCB. -TCB_CANCELED_BITMASK CANCELED_BITMASK |