279 files changed, 5908 insertions, 3107 deletions
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index cc15ce2d1e..015211f5f4 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -135,4 +135,11 @@ libmvec {
     _ZGVsMxv_tanh;
     _ZGVsMxv_tanhf;
   }
+  GLIBC_2.41 {
+    _ZGVnN2v_logp1;
+    _ZGVnN2v_logp1f;
+    _ZGVnN4v_logp1f;
+    _ZGVsMxv_logp1;
+    _ZGVsMxv_logp1f;
+  }
 }
diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c
index 8916dcbf40..004474acf9 100644
--- a/sysdeps/aarch64/fpu/acoshf_advsimd.c
+++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c
@@ -25,35 +25,32 @@ const static struct data
 {
   struct v_log1pf_data log1pf_consts;
   uint32x4_t one;
-  uint16x4_t thresh;
-} data = {
-  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
-  .one = V4 (0x3f800000),
-  .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)).  */
-};
+} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) };
+
+#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)).  */
 
 static float32x4_t NOINLINE VPCS_ATTR
 special_case (float32x4_t x, float32x4_t y, uint16x4_t special,
-	      const struct v_log1pf_data d)
+	      const struct v_log1pf_data *d)
 {
   return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special));
 }
 
 /* Vector approximation for single-precision acosh, based on log1p. Maximum
    error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
-   is 2.78 ULP:
-   __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
-			   want 0x1.ef9ea2p-3.
+   is 3.00 ULP:
+   _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4
+				 want 0x1.ef0a7cp-4.
    With exceptions disabled, we can compute u with a shorter dependency chain,
-   which gives maximum error of 3.07 ULP:
-  __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
-			   want 0x1.fbc7f4p-4.  */
+   which gives maximum error of 3.22 ULP:
+   _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5
+				 want 0x1.fdcdd2p-5.  */
 
 VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh);
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh);
 
 #if WANT_SIMD_EXCEPT
   /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
@@ -64,15 +61,16 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
   float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p);
   float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1);
 #else
-  float32x4_t xm1 = vsubq_f32 (x, v_f32 (1));
-  float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f)));
+  float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one));
+  float32x4_t u
+      = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one)));
 #endif
 
   float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u));
 
   if (__glibc_unlikely (v_any_u16h (special)))
-    return special_case (x, y, special, d->log1pf_consts);
-  return log1pf_inline (y, d->log1pf_consts);
+    return special_case (x, y, special, &d->log1pf_consts);
+  return log1pf_inline (y, &d->log1pf_consts);
 }
 libmvec_hidden_def (V_NAME_F1 (acosh))
 HALF_WIDTH_ALIAS_F1 (acosh)
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 097d403ffe..5909bb4ce9 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -36,6 +36,7 @@ libmvec_hidden_proto (V_NAME_F2(hypot));
 libmvec_hidden_proto (V_NAME_F1(log10));
 libmvec_hidden_proto (V_NAME_F1(log1p));
 libmvec_hidden_proto (V_NAME_F1(log2));
+libmvec_hidden_proto (V_NAME_F1(logp1));
 libmvec_hidden_proto (V_NAME_F1(log));
 libmvec_hidden_proto (V_NAME_F2(pow));
 libmvec_hidden_proto (V_NAME_F1(sin));
diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c
index 09fd8a6143..eb789b91b6 100644
--- a/sysdeps/aarch64/fpu/asinhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c
@@ -20,16 +20,16 @@
 #include "v_math.h"
 #include "v_log1pf_inline.h"
 
-#define SignMask v_u32 (0x80000000)
-
 const static struct data
 {
   struct v_log1pf_data log1pf_consts;
+  float32x4_t one;
   uint32x4_t big_bound;
 #if WANT_SIMD_EXCEPT
   uint32x4_t tiny_bound;
 #endif
 } data = {
+  .one = V4 (1),
   .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
   .big_bound = V4 (0x5f800000), /* asuint(0x1p64).  */
 #if WANT_SIMD_EXCEPT
@@ -38,20 +38,27 @@ const static struct data
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t sign, float32x4_t y,
+	      uint32x4_t special, const struct data *d)
 {
-  return v_call_f32 (asinhf, x, y, special);
+  return v_call_f32 (
+      asinhf, x,
+      vreinterpretq_f32_u32 (veorq_u32 (
+	  sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))),
+      special);
 }
 
 /* Single-precision implementation of vector asinh(x), using vector log1p.
-   Worst-case error is 2.66 ULP, at roughly +/-0.25:
-   __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3.  */
+   Worst-case error is 2.59 ULP:
+   _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3
+				 want 0x1.d449c4p-3.  */
 VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
 {
   const struct data *dat = ptr_barrier (&data);
-  uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask);
-  float32x4_t ax = vreinterpretq_f32_u32 (iax);
+  float32x4_t ax = vabsq_f32 (x);
+  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
   uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
+  uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax);
   float32x4_t special_arg = x;
 
 #if WANT_SIMD_EXCEPT
@@ -68,13 +75,13 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
   /* asinh(x) = log(x + sqrt(x * x + 1)).
      For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
   float32x4_t d
-      = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x)));
-  float32x4_t y = log1pf_inline (
-      vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts);
+      = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax)));
+  float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d));
 
   if (__glibc_unlikely (v_any_u32 (special)))
-    return special_case (special_arg, vbslq_f32 (SignMask, x, y), special);
-  return vbslq_f32 (SignMask, x, y);
+    return special_case (special_arg, sign, y, special, dat);
+  return vreinterpretq_f32_u32 (veorq_u32 (
+      sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts))));
 }
 libmvec_hidden_def (V_NAME_F1 (asinh))
 HALF_WIDTH_ALIAS_F1 (asinh)
diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c
index ae488f7b54..818b6c92ad 100644
--- a/sysdeps/aarch64/fpu/atanhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c
@@ -40,15 +40,17 @@ const static struct data
 #define Half v_u32 (0x3f000000)
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y,
+	      uint32x4_t special)
 {
-  return v_call_f32 (atanhf, x, y, special);
+  return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign),
+		     vmulq_f32 (halfsign, y), special);
 }
 
 /* Approximation for vector single-precision atanh(x) using modified log1p.
-   The maximum error is 3.08 ULP:
-   __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
-			   want 0x1.ffcb82p-5.  */
+   The maximum error is 2.93 ULP:
+   _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5
+				want 0x1.f4dcf8p-5.  */
 VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -68,11 +70,19 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
   uint32x4_t special = vcgeq_u32 (iax, d->one);
 #endif
 
-  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
-  y = log1pf_inline (y, d->log1pf_consts);
+  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax),
+			     vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax));
+  y = log1pf_inline (y, &d->log1pf_consts);
 
+  /* If exceptions not required, pass ax to special-case for shorter dependency
+     chain. If exceptions are required ax will have been zerofied, so have to
+     pass x.  */
   if (__glibc_unlikely (v_any_u32 (special)))
-    return special_case (x, vmulq_f32 (halfsign, y), special);
+#if WANT_SIMD_EXCEPT
+    return special_case (x, halfsign, y, special);
+#else
+    return special_case (ax, halfsign, y, special);
+#endif
   return vmulq_f32 (halfsign, y);
 }
 libmvec_hidden_def (V_NAME_F1 (atanh))
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 7484150131..f295fe185d 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -113,6 +113,10 @@
 # define __DECL_SIMD_log2 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_log2f
 # define __DECL_SIMD_log2f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_logp1
+# define __DECL_SIMD_logp1 __DECL_SIMD_aarch64
+# undef __DECL_SIMD_logp1f
+# define __DECL_SIMD_logp1f __DECL_SIMD_aarch64
 # undef __DECL_SIMD_pow
 # define __DECL_SIMD_pow __DECL_SIMD_aarch64
 # undef __DECL_SIMD_powf
@@ -180,6 +184,7 @@ __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
@@ -207,6 +212,7 @@ __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
@@ -239,6 +245,7 @@ __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
@@ -266,6 +273,7 @@ __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c
index 3924c9ce44..11a89b1530 100644
--- a/sysdeps/aarch64/fpu/cos_advsimd.c
+++ b/sysdeps/aarch64/fpu/cos_advsimd.c
@@ -22,7 +22,7 @@
 static const struct data
 {
   float64x2_t poly[7];
-  float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3;
+  float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* Worst-case error is 3.3 ulp in [-pi/2, pi/2].  */
   .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
@@ -30,11 +30,9 @@ static const struct data
 	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
 	    V2 (-0x1.9e9540300a1p-41) },
   .inv_pi = V2 (0x1.45f306dc9c883p-2),
-  .half_pi = V2 (0x1.921fb54442d18p+0),
   .pi_1 = V2 (0x1.921fb54442d18p+1),
   .pi_2 = V2 (0x1.1a62633145c06p-53),
   .pi_3 = V2 (0x1.c1cd129024e09p-106),
-  .shift = V2 (0x1.8p52),
   .range_val = V2 (0x1p23)
 };
 
@@ -68,10 +66,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
 #endif
 
   /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi));
-  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
-  n = vsubq_f64 (n, d->shift);
-  n = vsubq_f64 (n, v_f64 (0.5));
+  n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63);
+  n = vsubq_f64 (n, v_f64 (0.5f));
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f64 (r, d->pi_1, n);
diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c
index d0c285b03a..85a1b37373 100644
--- a/sysdeps/aarch64/fpu/cosf_advsimd.c
+++ b/sysdeps/aarch64/fpu/cosf_advsimd.c
@@ -22,7 +22,7 @@
 static const struct data
 {
   float32x4_t poly[4];
-  float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3;
+  float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* 1.886 ulp error.  */
   .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
@@ -33,8 +33,6 @@ static const struct data
   .pi_3 = V4 (-0x1.ee59dap-49f),
 
   .inv_pi = V4 (0x1.45f306p-2f),
-  .shift = V4 (0x1.8p+23f),
-  .half_pi = V4 (0x1.921fb6p0f),
   .range_val = V4 (0x1p20f)
 };
 
@@ -69,9 +67,8 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
 #endif
 
   /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi));
-  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
-  n = vsubq_f32 (n, d->shift);
+  n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31);
   n = vsubq_f32 (n, v_f32 (0.5f));
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c
index e5d8a299c6..7ad6efa0fc 100644
--- a/sysdeps/aarch64/fpu/coshf_sve.c
+++ b/sysdeps/aarch64/fpu/coshf_sve.c
@@ -23,37 +23,42 @@
 static const struct data
 {
   struct sv_expf_data expf_consts;
-  uint32_t special_bound;
+  float special_bound;
 } data = {
   .expf_consts = SV_EXPF_DATA,
   /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
-  .special_bound = 0x42ad496c,
+  .special_bound = 0x1.5a92d8p+6,
 };
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
+special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
+	      svbool_t pg)
 {
-  return sv_call_f32 (coshf, x, y, pg);
+  return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e),
+		      pg);
 }
 
 /* Single-precision vector cosh, using vector expf.
-   Maximum error is 1.89 ULP:
-   _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127
-				  want 0x1.f00adcp+127.  */
+   Maximum error is 2.77 ULP:
+   _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
+				 want 0x1.e4594cp+2.  */
 svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svfloat32_t ax = svabs_x (pg, x);
-  svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound);
+  svbool_t special = svacge (pg, x, d->special_bound);
 
-  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.  */
-  svfloat32_t t = expf_inline (ax, pg, &d->expf_consts);
-  svfloat32_t half_t = svmul_x (pg, t, 0.5);
-  svfloat32_t half_over_t = svdivr_x (pg, t, 0.5);
+  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.
+     Note that x is passed to exp here, rather than |x|. This is to avoid using
+     destructive unary ABS for better register usage. However it means the
+     routine is not exactly symmetrical, as the exp helper is slightly less
+     accurate in the negative range.  */
+  svfloat32_t e = expf_inline (x, pg, &d->expf_consts);
+  svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5);
+  svfloat32_t half_over_e = svdivr_x (pg, e, 0.5);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+    return special_case (x, half_e, half_over_e, special);
 
-  return svadd_x (pg, half_t, half_over_t);
+  return svadd_x (svptrue_b32 (), half_e, half_over_e);
 }
diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c
index e09b2f3b27..8aa3fa9c43 100644
--- a/sysdeps/aarch64/fpu/exp10f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10f_sve.c
@@ -18,74 +18,83 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
-/* For x < -SpecialBound, the result is subnormal and not handled correctly by
+/* For x < -Thres, the result is subnormal and not handled correctly by
    FEXPA.  */
-#define SpecialBound 37.9
+#define Thres 37.9
 
 static const struct data
 {
-  float poly[5];
-  float shift, log10_2, log2_10_hi, log2_10_lo, special_bound;
+  float log2_10_lo, c0, c2, c4;
+  float c1, c3, log10_2;
+  float shift, log2_10_hi, thres;
 } data = {
   /* Coefficients generated using Remez algorithm with minimisation of relative
      error.
      rel error: 0x1.89dafa3p-24
      abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
      maxerr: 0.52 +0.5 ulp.  */
-  .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f,
-	    0x1.12b41ap-1f },
+  .c0 = 0x1.26bb16p+1f,
+  .c1 = 0x1.5350d2p+1f,
+  .c2 = 0x1.04744ap+1f,
+  .c3 = 0x1.2d8176p+0f,
+  .c4 = 0x1.12b41ap-1f,
   /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
-  .shift = 0x1.903f8p17f,
+  .shift = 0x1.803f8p17f,
   .log10_2 = 0x1.a934fp+1,
   .log2_10_hi = 0x1.344136p-2,
   .log2_10_lo = -0x1.ec10cp-27,
-  .special_bound = SpecialBound,
+  .thres = Thres,
 };
 
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+static inline svfloat32_t
+sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 {
-  return sv_call_f32 (exp10f, x, y, special);
-}
-
-/* Single-precision SVE exp10f routine. Implements the same algorithm
-   as AdvSIMD exp10f.
-   Worst case error is 1.02 ULPs.
-   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
-				  want 0x1.ba5f9cp-1.  */
-svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
   /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
 
-  /* Load some constants in quad-word chunks to minimise memory access (last
-     lane is wasted).  */
-  svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2);
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
 
   /* n = round(x/(log10(2)/N)).  */
   svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0);
-  svfloat32_t n = svsub_x (pg, z, shift);
+  svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
+  svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
 
   /* r = x - n*log10(2)/N.  */
-  svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1);
-  r = svmls_lane (r, n, log10_2_and_inv, 2);
+  svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
+  r = svmls_lane (r, n, lane_consts, 0);
 
-  svbool_t special = svacgt (pg, x, d->special_bound);
   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
   /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t poly
-      = svmla_x (pg, svmul_x (pg, r, d->poly[0]),
-		 sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2);
-
-  if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), special);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
 
   return svmla_x (pg, scale, scale, poly);
 }
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+  return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d),
+		      special);
+}
+
+/* Single-precision SVE exp10f routine. Implements the same algorithm
+   as AdvSIMD exp10f.
+   Worst case error is 1.02 ULPs.
+   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
+				  want 0x1.ba5f9cp-1.  */
+svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t special = svacgt (pg, x, d->thres);
+  if (__glibc_unlikely (svptest_any (special, special)))
+    return special_case (x, special, d);
+  return sv_exp10f_inline (x, pg, d);
+}
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index 8a686e3e05..c6216bed9e 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -24,54 +24,64 @@
 
 static const struct data
 {
-  float poly[5];
+  float c0, c2, c4, c1, c3;
   float shift, thres;
 } data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-     compatibility with polynomial helpers.  */
-  .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f,
-	    0x1.59977ap-10f },
+  /* Coefficients copied from the polynomial in AdvSIMD variant.  */
+  .c0 = 0x1.62e422p-1f,
+  .c1 = 0x1.ebf9bcp-3f,
+  .c2 = 0x1.c6bd32p-5f,
+  .c3 = 0x1.3ce9e4p-7f,
+  .c4 = 0x1.59977ap-10f,
   /* 1.5*2^17 + 127.  */
-  .shift = 0x1.903f8p17f,
+  .shift = 0x1.803f8p17f,
   /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
      correctly by FEXPA.  */
   .thres = Thres,
 };
 
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (exp2f, x, y, special);
-}
-
-/* Single-precision SVE exp2f routine. Implements the same algorithm
-   as AdvSIMD exp2f.
-   Worst case error is 1.04 ULPs.
-   SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0
-				  want 0x1.ba7ebp+0.  */
-svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+static inline svfloat32_t
+sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 {
-  const struct data *d = ptr_barrier (&data);
   /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
     x = n + r, with r in [-1/2, 1/2].  */
-  svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svadd_x (pg, x, shift);
-  svfloat32_t n = svsub_x (pg, z, shift);
-  svfloat32_t r = svsub_x (pg, x, n);
+  svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift);
+  svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift);
+  svfloat32_t r = svsub_x (svptrue_b32 (), x, n);
 
-  svbool_t special = svacgt (pg, x, d->thres);
   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
   /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
      Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
      coefficients 1 to 4, and apply most significant coefficient directly.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1);
-  svfloat32_t p0 = svmul_x (pg, r, d->poly[0]);
+  svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
+  svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
+  svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
   svfloat32_t poly = svmla_x (pg, p0, r2, p14);
 
-  if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), special);
-
   return svmla_x (pg, scale, scale, poly);
 }
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+  return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d),
+		      special);
+}
+
+/* Single-precision SVE exp2f routine. Implements the same algorithm
+   as AdvSIMD exp2f.
+   Worst case error is 1.04 ULPs.
+   _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
+				 want 0x1.ba6a64p-1.  */
+svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t special = svacgt (pg, x, d->thres);
+  if (__glibc_unlikely (svptest_any (special, special)))
+    return special_case (x, special, d);
+  return sv_exp2f_inline (x, pg, d);
+}
diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c
index 99d2e647aa..5c9cb72620 100644
--- a/sysdeps/aarch64/fpu/expf_advsimd.c
+++ b/sysdeps/aarch64/fpu/expf_advsimd.c
@@ -22,7 +22,7 @@
 static const struct data
 {
   float32x4_t poly[5];
-  float32x4_t shift, inv_ln2, ln2_hi, ln2_lo;
+  float32x4_t inv_ln2, ln2_hi, ln2_lo;
   uint32x4_t exponent_bias;
 #if !WANT_SIMD_EXCEPT
   float32x4_t special_bound, scale_thresh;
@@ -31,7 +31,6 @@ static const struct data
   /* maxerr: 1.45358 +0.5 ulp.  */
   .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),
 	    V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },
-  .shift = V4 (0x1.8p23f),
   .inv_ln2 = V4 (0x1.715476p+0f),
   .ln2_hi = V4 (0x1.62e4p-1f),
   .ln2_lo = V4 (0x1.7f7d1cp-20f),
@@ -85,7 +84,7 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  float32x4_t n, r, r2, scale, p, q, poly, z;
+  float32x4_t n, r, r2, scale, p, q, poly;
   uint32x4_t cmp, e;
 
 #if WANT_SIMD_EXCEPT
@@ -104,11 +103,10 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
 
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-  z = vfmaq_f32 (d->shift, x, d->inv_ln2);
-  n = vsubq_f32 (z, d->shift);
+  n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2));
   r = vfmsq_f32 (x, n, d->ln2_hi);
   r = vfmsq_f32 (r, n, d->ln2_lo);
-  e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+  e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
   scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c
index 3ba79bc4f1..da93e01b87 100644
--- a/sysdeps/aarch64/fpu/expf_sve.c
+++ b/sysdeps/aarch64/fpu/expf_sve.c
@@ -18,33 +18,25 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
+#include "sv_expf_inline.h"
+
+/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+   correctly by FEXPA.  */
+#define Thres 0x1.5d5e2ap+6f
 
 static const struct data
 {
-  float poly[5];
-  float inv_ln2, ln2_hi, ln2_lo, shift, thres;
+  struct sv_expf_data d;
+  float thres;
 } data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-     compatibility with polynomial helpers.  */
-  .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f,
-	    0x1.0e4020p-7f },
-  .inv_ln2 = 0x1.715476p+0f,
-  .ln2_hi = 0x1.62e4p-1f,
-  .ln2_lo = 0x1.7f7d1cp-20f,
-  /* 1.5*2^17 + 127.  */
-  .shift = 0x1.903f8p17f,
-  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
-     correctly by FEXPA.  */
-  .thres = 0x1.5d5e2ap+6f,
+  .d = SV_EXPF_DATA,
+  .thres = Thres,
 };
 
-#define C(i) sv_f32 (d->poly[i])
-#define ExponentBias 0x3f800000
-
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
 {
-  return sv_call_f32 (expf, x, y, special);
+  return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special);
 }
 
 /* Optimised single-precision SVE exp function.
@@ -54,36 +46,8 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
 svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
-
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access (last
-     lane is wasted).  */
-  svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2);
-
-  /* n = round(x/(ln2/N)).  */
-  svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0);
-  svfloat32_t n = svsub_x (pg, z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1);
-  r = svmls_lane (r, n, invln2_and_ln2, 2);
-
-  /* scale = 2^(n/N).  */
   svbool_t is_special_case = svacgt (pg, x, d->thres);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
-  svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
-  svfloat32_t p34 = svmla_x (pg, C (3), C (4), r);
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_x (pg, r, C (0));
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
   if (__glibc_unlikely (svptest_any (pg, is_special_case)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case);
-
-  return svmla_x (pg, scale, scale, poly);
+    return special_case (x, is_special_case, &d->d);
+  return expf_inline (x, pg, &d->d);
 }
diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
index a0616ec754..8303ca296e 100644
--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
@@ -18,27 +18,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
+#include "v_expm1f_inline.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
-  float invln2_and_ln2[4];
-  float32x4_t shift;
-  int32x4_t exponent_bias;
+  struct v_expm1f_data d;
 #if WANT_SIMD_EXCEPT
   uint32x4_t thresh;
 #else
   float32x4_t oflow_bound;
 #endif
 } data = {
-  /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2].  */
-  .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
-	    V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
-  /* Stores constants: invln2, ln2_hi, ln2_lo, 0.  */
-  .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
-  .shift = V4 (0x1.8p23f),
-  .exponent_bias = V4 (0x3f800000),
+  .d = V_EXPM1F_DATA,
 #if !WANT_SIMD_EXCEPT
   /* Value above which expm1f(x) should overflow. Absolute value of the
      underflow bound is greater than this, so it catches both cases - there is
@@ -55,67 +46,38 @@ static const struct data
 #define TinyBound v_u32 (0x34000000 << 1)
 
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t special, const struct data *d)
 {
-  return v_call_f32 (expm1f, x, y, special);
+  return v_call_f32 (
+      expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special);
 }
 
 /* Single-precision vector exp(x) - 1 function.
-   The maximum error is 1.51 ULP:
-   _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2
-				  want 0x1.e2fb94p-2.  */
+   The maximum error is 1.62 ULP:
+   _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2
+				want 0x1.da9f44p-2.  */
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  uint32x4_t ix = vreinterpretq_u32_f32 (x);
 
 #if WANT_SIMD_EXCEPT
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
   /* If fp exceptions are to be triggered correctly, fall back to scalar for
      |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
      shift-left by 1, and compare with thresh which was left-shifted offline -
      this is effectively an absolute compare.  */
   uint32x4_t special
       = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
-  if (__glibc_unlikely (v_any_u32 (special)))
-    x = v_zerofy_f32 (x, special);
 #else
   /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
   uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
 #endif
 
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
-  float32x4_t j
-      = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
-  int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  float32x4_t p = v_horner_4_f32 (f, d->poly);
-  p = vfmaq_f32 (f, vmulq_f32 (f, f), p);
-
-  /* Assemble the result.
-     expm1(x) ~= 2^i * (p + 1) - 1
-     Let t = 2^i.  */
-  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
-  float32x4_t t = vreinterpretq_f32_s32 (u);
-
   if (__glibc_unlikely (v_any_u32 (special)))
-    return special_case (vreinterpretq_f32_u32 (ix),
-			 vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t),
-			 special);
+    return special_case (x, special, d);
 
   /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+  return expm1f_inline (x, &d->d);
 }
 libmvec_hidden_def (V_NAME_F1 (expm1))
 HALF_WIDTH_ALIAS_F1 (expm1)
diff --git a/sysdeps/aarch64/fpu/log10f_advsimd.c b/sysdeps/aarch64/fpu/log10f_advsimd.c
index 9347422a77..82228b599a 100644
--- a/sysdeps/aarch64/fpu/log10f_advsimd.c
+++ b/sysdeps/aarch64/fpu/log10f_advsimd.c
@@ -22,11 +22,11 @@
 
 static const struct data
 {
-  uint32x4_t min_norm;
+  uint32x4_t off, offset_lower_bound;
   uint16x8_t special_bound;
+  uint32x4_t mantissa_mask;
   float32x4_t poly[8];
   float32x4_t inv_ln10, ln2;
-  uint32x4_t off, mantissa_mask;
 } data = {
   /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
       [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25.  */
@@ -35,18 +35,22 @@ static const struct data
 	    V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) },
   .ln2 = V4 (0x1.62e43p-1f),
   .inv_ln10 = V4 (0x1.bcb7b2p-2f),
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
   .off = V4 (0x3f2aaaab),	/* 0.666667.  */
   .mantissa_mask = V4 (0x007fffff),
 };
 
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2,
-	      uint16x4_t cmp)
+special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2,
+	      uint16x4_t cmp, const struct data *d)
 {
   /* Fall back to scalar code.  */
-  return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
+  return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
 }
 
 /* Fast implementation of AdvSIMD log10f,
@@ -58,15 +62,21 @@ special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2,
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-				 vget_low_u16 (d->special_bound));
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vreinterpretq_u32_f32 (x);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
+  u_off = vsubq_u32 (u_off, d->off);
   float32x4_t n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+				 vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
   float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
 
   /* y = log10(1+r) + n * log10(2).  */
@@ -77,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x)
   y = vmulq_f32 (y, d->inv_ln10);
 
   if (__glibc_unlikely (v_any_u16h (special)))
-    return special_case (x, y, poly, r2, special);
+    return special_case (y, u_off, poly, r2, special, d);
   return vfmaq_f32 (y, poly, r2);
 }
 libmvec_hidden_def (V_NAME_F1 (log10))
diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c
index bdbb49cd32..7913679f67 100644
--- a/sysdeps/aarch64/fpu/log10f_sve.c
+++ b/sysdeps/aarch64/fpu/log10f_sve.c
@@ -24,6 +24,7 @@ static const struct data
   float poly_0246[4];
   float poly_1357[4];
   float ln2, inv_ln10;
+  uint32_t off, lower;
 } data = {
   .poly_1357 = {
     /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
@@ -35,18 +36,23 @@ static const struct data
 		 -0x1.0fc92cp-4f },
   .ln2 = 0x1.62e43p-1f,
   .inv_ln10 = 0x1.bcb7b2p-2f,
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min 0x00800000
-#define Max 0x7f800000
-#define Thres 0x7f000000  /* Max - Min.  */
-#define Offset 0x3f2aaaab /* 0.666667.  */
+#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000.  */
 #define MantissaMask 0x007fffff
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (log10f, x, y, special);
+  return sv_call_f32 (
+      log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE log10f using the same algorithm and
@@ -57,23 +63,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
 svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
-  svuint32_t ix = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  ix = svsub_x (pg, ix, Offset);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend.  */
-  ix = svand_x (pg, ix, MantissaMask);
-  ix = svadd_x (pg, ix, Offset);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend.  */
+  svuint32_t ix = svand_x (pg, u_off, MantissaMask);
+  ix = svadd_x (pg, ix, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f);
 
   /* y = log10(1+r) + n*log10(2)
      log10(1+r) ~ r * InvLn(10) + P(r)
      where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for
      log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3).  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t r4 = svmul_x (pg, r2, r2);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2);
   svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
   svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0);
   svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1);
@@ -88,7 +96,6 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
   hi = svmul_x (pg, hi, d->inv_ln10);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
-			 special);
-  return svmla_x (pg, hi, r2, y);
+    return special_case (u_off, hi, r2, y, special);
+  return svmla_x (svptrue_b32 (), hi, r2, y);
 }
diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c
index ffc418fc9c..114064c696 100644
--- a/sysdeps/aarch64/fpu/log1p_advsimd.c
+++ b/sysdeps/aarch64/fpu/log1p_advsimd.c
@@ -127,3 +127,5 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
 
   return vfmaq_f64 (y, f2, p);
 }
+
+strong_alias (V_NAME_D1 (log1p), V_NAME_D1 (logp1))
diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c
index 04f7e5720e..b21cfb2c90 100644
--- a/sysdeps/aarch64/fpu/log1p_sve.c
+++ b/sysdeps/aarch64/fpu/log1p_sve.c
@@ -116,3 +116,5 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
 
   return y;
 }
+
+strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1))
diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c
index dc15334a85..00006fc703 100644
--- a/sysdeps/aarch64/fpu/log1pf_advsimd.c
+++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c
@@ -18,113 +18,81 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
+#include "v_log1pf_inline.h"
+
+#if WANT_SIMD_EXCEPT
 
 const static struct data
 {
-  float32x4_t poly[8], ln2;
-  uint32x4_t tiny_bound, minus_one, four, thresh;
-  int32x4_t three_quarters;
+  uint32x4_t minus_one, thresh;
+  struct v_log1pf_data d;
 } data = {
-  .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients
-	       (1, -0.5) are not stored as they can be generated more
-	       efficiently.  */
-	    V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),
-	    V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f),
-	    V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23.  */
-  .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound.  */
+  .d = V_LOG1PF_CONSTANTS_TABLE,
+  .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound.  */
   .minus_one = V4 (0xbf800000),
-  .four = V4 (0x40800000),
-  .three_quarters = V4 (0x3f400000)
 };
 
-static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *p)
-{
-  /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme.  */
-  float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]);
-  float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]);
-  float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]);
-  float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]);
-
-  float32x4_t m2 = vmulq_f32 (m, m);
-  float32x4_t p_02 = vfmaq_f32 (m, m2, p_12);
-  float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56);
-  float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]);
-
-  float32x4_t m4 = vmulq_f32 (m2, m2);
-  float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36);
-  return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79));
-}
+/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23.  */
+#  define TinyBound v_u32 (0x34000000)
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t cmp, const struct data *d)
 {
-  return v_call_f32 (log1pf, x, y, special);
+  /* Side-step special lanes so fenv exceptions are not triggered
+     inadvertently.  */
+  float32x4_t x_nospecial = v_zerofy_f32 (x, cmp);
+  return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp);
 }
 
-/* Vector log1pf approximation using polynomial on reduced interval. Accuracy
-   is roughly 2.02 ULP:
-   log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3.  */
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.69 ULP:
+   _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3
+				 want 0x1.cfcbdcp-3.  */
 VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
   uint32x4_t special_cases
-      = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh),
+      = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh),
 		   vcgeq_u32 (ix, d->minus_one));
-  float32x4_t special_arg = x;
 
-#if WANT_SIMD_EXCEPT
   if (__glibc_unlikely (v_any_u32 (special_cases)))
-    /* Side-step special lanes so fenv exceptions are not triggered
-       inadvertently.  */
-    x = v_zerofy_f32 (x, special_cases);
-#endif
+    return special_case (x, special_cases, d);
 
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
+  return log1pf_inline (x, &d->d);
+}
 
-  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+#else
 
-  /* Choose k to scale x to the range [-1/4, 1/2].  */
-  int32x4_t k
-      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
-		   v_s32 (0xff800000));
-  uint32x4_t ku = vreinterpretq_u32_s32 (k);
+const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE;
 
-  /* Scale x by exponent manipulation.  */
-  float32x4_t m_scale
-      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t cmp)
+{
+  return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp);
+}
 
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number, and scale m down accordingly.  */
-  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
-  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.63 ULP:
+   _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3
+				 want 0x1.fdcb16p-3.  */
+VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
+{
+  uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)),
+					vcaleq_f32 (x, v_f32 (0x1p127f)));
 
-  /* Evaluate polynomial on the reduced interval.  */
-  float32x4_t p = eval_poly (m_scale, d->poly);
+  if (__glibc_unlikely (v_any_u32 (special_cases)))
+    return special_case (x, special_cases);
 
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23));
+  return log1pf_inline (x, ptr_barrier (&data));
+}
 
-  /* Apply the scaling back.  */
-  float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2);
+#endif
 
-  if (__glibc_unlikely (v_any_u32 (special_cases)))
-    return special_case (special_arg, y, special_cases);
-  return y;
-}
 libmvec_hidden_def (V_NAME_F1 (log1p))
 HALF_WIDTH_ALIAS_F1 (log1p)
+strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1))
+libmvec_hidden_def (V_NAME_F1 (logp1))
+HALF_WIDTH_ALIAS_F1 (logp1)
diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c
index f645cc997e..5256d5e94c 100644
--- a/sysdeps/aarch64/fpu/log1pf_sve.c
+++ b/sysdeps/aarch64/fpu/log1pf_sve.c
@@ -98,3 +98,5 @@ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
 
   return y;
 }
+
+strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1))
diff --git a/sysdeps/aarch64/fpu/log2f_advsimd.c b/sysdeps/aarch64/fpu/log2f_advsimd.c
index db21836749..84effe4fe9 100644
--- a/sysdeps/aarch64/fpu/log2f_advsimd.c
+++ b/sysdeps/aarch64/fpu/log2f_advsimd.c
@@ -22,9 +22,9 @@
 
 static const struct data
 {
-  uint32x4_t min_norm;
+  uint32x4_t off, offset_lower_bound;
   uint16x8_t special_bound;
-  uint32x4_t off, mantissa_mask;
+  uint32x4_t mantissa_mask;
   float32x4_t poly[9];
 } data = {
   /* Coefficients generated using Remez algorithm approximate
@@ -34,18 +34,22 @@ static const struct data
 	    V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f),
 	    V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f),
 	    V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) },
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
   .off = V4 (0x3f2aaaab),	/* 0.666667.  */
   .mantissa_mask = V4 (0x007fffff),
 };
 
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r,
-	      uint16x4_t cmp)
+special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r,
+	      uint16x4_t cmp, const struct data *d)
 {
   /* Fall back to scalar code.  */
-  return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
+  return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
 }
 
 /* Fast implementation for single precision AdvSIMD log2,
@@ -56,15 +60,21 @@ special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r,
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-				 vget_low_u16 (d->special_bound));
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vreinterpretq_u32_f32 (x);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
+  u_off = vsubq_u32 (u_off, d->off);
   float32x4_t n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+				 vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
   float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
 
   /* y = log2(1+r) + n.  */
@@ -72,7 +82,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x)
   float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly);
 
   if (__glibc_unlikely (v_any_u16h (special)))
-    return special_case (x, n, p, r, special);
+    return special_case (n, u_off, p, r, special, d);
   return vfmaq_f32 (n, p, r);
 }
 libmvec_hidden_def (V_NAME_F1 (log2))
diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c
index 5031c42483..939d89bfb9 100644
--- a/sysdeps/aarch64/fpu/log2f_sve.c
+++ b/sysdeps/aarch64/fpu/log2f_sve.c
@@ -23,6 +23,7 @@ static const struct data
 {
   float poly_02468[5];
   float poly_1357[4];
+  uint32_t off, lower;
 } data = {
   .poly_1357 = {
     /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
@@ -32,18 +33,23 @@ static const struct data
   },
   .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f,
 		  0x1.9d8ecap-3f, 0x1.9e495p-3f },
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thres (0x7f000000) /* Max - Min.  */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000.  */
 #define MantissaMask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667.  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (log2f, x, y, cmp);
+  return sv_call_f32 (
+      log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE log2f, using the same algorithm
@@ -55,19 +61,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres);
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_x (pg, u, Off);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
-  u = svand_x (pg, u, MantissaMask);
-  u = svadd_x (pg, u, Off);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend.  */
+  svuint32_t u = svand_x (pg, u_off, MantissaMask);
+  u = svadd_x (pg, u, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log2(1+r) + n.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
 
   /* Evaluate polynomial using pairwise Horner scheme.  */
   svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
@@ -81,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
   y = svmla_x (pg, q_01, r2, y);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special);
-  return svmla_x (pg, n, r, y);
+    return special_case (u_off, n, r, y, special);
+  return svmla_x (svptrue_b32 (), n, r, y);
 }
diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c
index 3c0d0fcdc7..c20dbfd6c0 100644
--- a/sysdeps/aarch64/fpu/logf_advsimd.c
+++ b/sysdeps/aarch64/fpu/logf_advsimd.c
@@ -21,20 +21,22 @@
 
 static const struct data
 {
-  uint32x4_t min_norm;
+  uint32x4_t off, offset_lower_bound;
   uint16x8_t special_bound;
+  uint32x4_t mantissa_mask;
   float32x4_t poly[7];
-  float32x4_t ln2, tiny_bound;
-  uint32x4_t off, mantissa_mask;
+  float32x4_t ln2;
 } data = {
   /* 3.34 ulp error.  */
   .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
 	    V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),
 	    V4 (-0x1.ffffc8p-2f) },
   .ln2 = V4 (0x1.62e43p-1f),
-  .tiny_bound = V4 (0x1p-126),
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
   .off = V4 (0x3f2aaaab),	/* 0.666667.  */
   .mantissa_mask = V4 (0x007fffff)
 };
@@ -42,32 +44,37 @@ static const struct data
 #define P(i) d->poly[7 - i]
 
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
-	      uint16x4_t cmp)
+special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2,
+	      uint16x4_t cmp, const struct data *d)
 {
   /* Fall back to scalar code.  */
-  return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
+  return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
 }
 
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, p, q, r, r2, y;
-  uint32x4_t u;
+  uint32x4_t u, u_off;
   uint16x4_t cmp;
 
-  u = vreinterpretq_u32_f32 (x);
-  cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-		  vget_low_u16 (d->special_bound));
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  u_off = vreinterpretq_u32_f32 (x);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
+  u_off = vsubq_u32 (u_off, d->off);
   n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vandq_u32 (u, d->mantissa_mask);
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+  u = vandq_u32 (u_off, d->mantissa_mask);
   u = vaddq_u32 (u, d->off);
   r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
 
+  cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+		  vget_low_u16 (d->special_bound));
+
   /* y = log(1+r) + n*ln2.  */
   r2 = vmulq_f32 (r, r);
   /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
@@ -80,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x)
   p = vfmaq_f32 (r, d->ln2, n);
 
   if (__glibc_unlikely (v_any_u16h (cmp)))
-    return special_case (x, y, r2, p, cmp);
+    return special_case (p, u_off, y, r2, cmp, d);
   return vfmaq_f32 (p, y, r2);
 }
 libmvec_hidden_def (V_NAME_F1 (log))
diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c
index d64e810cfe..5b9324678d 100644
--- a/sysdeps/aarch64/fpu/logf_sve.c
+++ b/sysdeps/aarch64/fpu/logf_sve.c
@@ -24,6 +24,7 @@ static const struct data
   float poly_0135[4];
   float poly_246[3];
   float ln2;
+  uint32_t off, lower;
 } data = {
   .poly_0135 = {
     /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
@@ -32,19 +33,24 @@ static const struct data
     -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
   },
   .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
-  .ln2 = 0x1.62e43p-1f
+  .ln2 = 0x1.62e43p-1f,
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thresh (0x7f000000) /* Max - Min.  */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000.  */
 #define Mask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667.  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (logf, x, y, cmp);
+  return sv_call_f32 (
+      logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE logf, using the same algorithm and
@@ -55,19 +61,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32 (x);
-  svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_x (pg, u, Off);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
-  u = svand_x (pg, u, Mask);
-  u = svadd_x (pg, u, Off);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend.  */
+
+  svuint32_t u = svand_x (pg, u_off, Mask);
+  u = svadd_x (pg, u, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log(1+r) + n*ln2.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
   /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))).  */
   svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
   svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
@@ -80,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
   p = svmla_x (pg, r, n, d->ln2);
 
   if (__glibc_unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
+    return special_case (u_off, p, r2, y, cmp);
   return svmla_x (pg, p, r2, y);
 }
diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c
index a0d9d3b819..718125cbad 100644
--- a/sysdeps/aarch64/fpu/sin_advsimd.c
+++ b/sysdeps/aarch64/fpu/sin_advsimd.c
@@ -22,7 +22,7 @@
 static const struct data
 {
   float64x2_t poly[7];
-  float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+  float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
 	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
@@ -34,12 +34,13 @@ static const struct data
   .pi_1 = V2 (0x1.921fb54442d18p+1),
   .pi_2 = V2 (0x1.1a62633145c06p-53),
   .pi_3 = V2 (0x1.c1cd129024e09p-106),
-  .shift = V2 (0x1.8p52),
 };
 
 #if WANT_SIMD_EXCEPT
-# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255).  */
-# define Thresh v_u64 (0x1160000000000000)    /* RangeVal - TinyBound.  */
+/* asuint64(0x1p-253)), below which multiply by inv_pi underflows.  */
+# define TinyBound v_u64 (0x3020000000000000)
+/* RangeVal - TinyBound.  */
+# define Thresh v_u64 (0x1160000000000000)
 #endif
 
 #define C(i) d->poly[i]
@@ -72,16 +73,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
      fenv). These lanes will be fixed by special-case handler later.  */
   uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
-  r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
+  r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp));
 #else
   r = x;
   cmp = vcageq_f64 (x, d->range_val);
 #endif
 
   /* n = rint(|x|/pi).  */
-  n = vfmaq_f64 (d->shift, d->inv_pi, r);
-  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
-  n = vsubq_f64 (n, d->shift);
+  n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63);
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f64 (r, d->pi_1, n);
diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c
index 375dfc3331..6ee9a23d5b 100644
--- a/sysdeps/aarch64/fpu/sinf_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinf_advsimd.c
@@ -22,7 +22,7 @@
 static const struct data
 {
   float32x4_t poly[4];
-  float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+  float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* 1.886 ulp error.  */
   .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
@@ -33,13 +33,14 @@ static const struct data
   .pi_3 = V4 (-0x1.ee59dap-49f),
 
   .inv_pi = V4 (0x1.45f306p-2f),
-  .shift = V4 (0x1.8p+23f),
   .range_val = V4 (0x1p20f)
 };
 
 #if WANT_SIMD_EXCEPT
-# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f).  */
-# define Thresh v_u32 (0x28800000)    /* RangeVal - TinyBound.  */
+/* asuint32(0x1p-59f), below which multiply by inv_pi underflows.  */
+# define TinyBound v_u32 (0x22000000)
+/* RangeVal - TinyBound.  */
+# define Thresh v_u32 (0x27800000)
 #endif
 
 #define C(i) d->poly[i]
@@ -64,23 +65,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
   /* If fenv exceptions are to be triggered correctly, set any special lanes
      to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
      special-case handler later.  */
-  r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
+  r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp));
 #else
   r = x;
   cmp = vcageq_f32 (x, d->range_val);
 #endif
 
-  /* n = rint(|x|/pi) */
-  n = vfmaq_f32 (d->shift, d->inv_pi, r);
-  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
-  n = vsubq_f32 (n, d->shift);
+  /* n = rint(|x|/pi).  */
+  n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31);
 
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f32 (r, d->pi_1, n);
   r = vfmsq_f32 (r, d->pi_2, n);
   r = vfmsq_f32 (r, d->pi_3, n);
 
-  /* y = sin(r) */
+  /* y = sin(r).  */
   r2 = vmulq_f32 (r, r);
   y = vfmaq_f32 (C (2), C (3), r2);
   y = vfmaq_f32 (C (1), y, r2);
diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c
index 6bb7482dc2..c6ed7598e7 100644
--- a/sysdeps/aarch64/fpu/sinhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c
@@ -23,15 +23,13 @@
 static const struct data
 {
   struct v_expm1f_data expm1f_consts;
-  uint32x4_t halff;
 #if WANT_SIMD_EXCEPT
   uint32x4_t tiny_bound, thresh;
 #else
-  uint32x4_t oflow_bound;
+  float32x4_t oflow_bound;
 #endif
 } data = {
   .expm1f_consts = V_EXPM1F_DATA,
-  .halff = V4 (0x3f000000),
 #if WANT_SIMD_EXCEPT
   /* 0x1.6a09e8p-32, below which expm1f underflows.  */
   .tiny_bound = V4 (0x2fb504f4),
@@ -39,14 +37,15 @@ static const struct data
   .thresh = V4 (0x12fbbbb3),
 #else
   /* 0x1.61814ep+6, above which expm1f helper overflows.  */
-  .oflow_bound = V4 (0x42b0c0a7),
+  .oflow_bound = V4 (0x1.61814ep+6),
 #endif
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign,
+	      uint32x4_t special)
 {
-  return v_call_f32 (sinhf, x, y, special);
+  return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special);
 }
 
 /* Approximation for vector single-precision sinh(x) using expm1.
@@ -60,15 +59,15 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   float32x4_t ax = vabsq_f32 (x);
-  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
-  uint32x4_t sign = veorq_u32 (ix, iax);
-  float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff));
+  float32x4_t halfsign = vreinterpretq_f32_u32 (
+      vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5))));
 
 #if WANT_SIMD_EXCEPT
-  uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh);
+  uint32x4_t special = vcgeq_u32 (
+      vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh);
   ax = v_zerofy_f32 (ax, special);
 #else
-  uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound);
+  uint32x4_t special = vcageq_f32 (x, d->oflow_bound);
 #endif
 
   /* Up to the point that expm1f overflows, we can use it to calculate sinhf
@@ -80,7 +79,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
   /* Fall back to the scalar variant for any lanes that should trigger an
      exception.  */
   if (__glibc_unlikely (v_any_u32 (special)))
-    return special_case (x, vmulq_f32 (t, halfsign), special);
+    return special_case (x, t, halfsign, special);
 
   return vmulq_f32 (t, halfsign);
 }
diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h
index 23963b5f8e..6166df6553 100644
--- a/sysdeps/aarch64/fpu/sv_expf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expf_inline.h
@@ -24,19 +24,20 @@
 
 struct sv_expf_data
 {
-  float poly[5];
-  float inv_ln2, ln2_hi, ln2_lo, shift;
+  float c1, c3, inv_ln2;
+  float ln2_lo, c0, c2, c4;
+  float ln2_hi, shift;
 };
 
 /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
    compatibility with polynomial helpers. Shift is 1.5*2^17 + 127.  */
 #define SV_EXPF_DATA                                                          \
   {                                                                           \
-    .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \
-	      0x1.0e4020p-7f },                                               \
-                                                                              \
-    .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,                        \
-    .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f,                        \
+    /* Coefficients copied from the polynomial in AdvSIMD variant.  */        \
+    .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f,         \
+    .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f,    \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+    .shift = 0x1.803f8p17f,                                                   \
   }
 
 #define C(i) sv_f32 (d->poly[i])
@@ -47,26 +48,25 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
 
-  /* Load some constants in quad-word chunks to minimise memory access.  */
-  svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]);
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo);
 
   /* n = round(x/(ln2/N)).  */
-  svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1);
+  svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift);
   svfloat32_t n = svsub_x (pg, z, d->shift);
 
   /* r = x - n*ln2/N.  */
-  svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2);
-  r = svmls_lane (r, n, c4_invln2_and_ln2, 3);
+  svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x);
+  r = svmls_lane (r, n, lane_consts, 0);
 
   /* scale = 2^(n/N).  */
-  svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z));
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
   /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
-  svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
-  svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0);
-  svfloat32_t r2 = svmul_f32_x (pg, r, r);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
   svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_f32_x (pg, r, C (0));
+  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
   svfloat32_t poly = svmla_x (pg, p0, r2, p14);
 
   return svmla_x (pg, scale, scale, poly);
diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c
index 50defd6ef0..3ced9b7a41 100644
--- a/sysdeps/aarch64/fpu/tanhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c
@@ -28,13 +28,16 @@ static const struct data
   /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for  negative).  */
   .boring_bound = V4 (0x41102cb3),
   .large_bound = V4 (0x7f800000),
-  .onef = V4 (0x3f800000),
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring,
+	      float32x4_t q, uint32x4_t special)
 {
-  return v_call_f32 (tanhf, x, y, special);
+  return v_call_f32 (
+      tanhf, x,
+      vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))),
+      special);
 }
 
 /* Approximation for single-precision vector tanh(x), using a simplified
@@ -50,7 +53,9 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
   uint32x4_t iax = vreinterpretq_u32_f32 (ax);
   uint32x4_t sign = veorq_u32 (ix, iax);
   uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound);
-  float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef));
+  /* expm1 exponent bias is 1.0f reinterpreted to int.  */
+  float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (
+      sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias)));
 
 #if WANT_SIMD_EXCEPT
   /* If fp exceptions are to be triggered properly, set all special and boring
@@ -66,10 +71,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
 
   /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
   float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts);
-  float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
+
   if (__glibc_unlikely (v_any_u32 (special)))
-    return special_case (vreinterpretq_f32_u32 (ix),
-			 vbslq_f32 (is_boring, boring, y), special);
+    return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q,
+			 special);
+
+  float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
   return vbslq_f32 (is_boring, boring, y);
 }
 libmvec_hidden_def (V_NAME_F1 (tanh))
diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h
index 59b552da6b..1daedfdd51 100644
--- a/sysdeps/aarch64/fpu/v_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h
@@ -21,48 +21,47 @@
 #define AARCH64_FPU_V_EXPM1F_INLINE_H
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
+#include "math_config.h"
 
 struct v_expm1f_data
 {
-  float32x4_t poly[5];
-  float invln2_and_ln2[4];
-  float32x4_t shift;
+  float32x4_t c0, c2;
   int32x4_t exponent_bias;
+  float c1, c3, inv_ln2, c4;
+  float ln2_hi, ln2_lo;
 };
 
 /* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
-   log(2)/2]. Exponent bias is asuint(1.0f).
-   invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0.  */
+   log(2)/2]. Exponent bias is asuint(1.0f).  */
 #define V_EXPM1F_DATA                                                         \
   {                                                                           \
-    .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),     \
-	      V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },                      \
-    .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000),                \
-    .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },   \
+    .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5),  \
+    .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10,                                \
+    .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f,              \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
   }
 
 static inline float32x4_t
 expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
 {
-  /* Helper routine for calculating exp(x) - 1.
-     Copied from v_expm1f_1u6.c, with all special-case handling removed - the
-     calling routine should handle special values if required.  */
+  /* Helper routine for calculating exp(x) - 1.  */
+
+  float32x2_t ln2 = vld1_f32 (&d->ln2_hi);
+  float32x4_t lane_consts = vld1q_f32 (&d->c1);
 
   /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
-  float32x4_t j
-      = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
+  float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2));
   int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
+  float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0);
+  f = vfmsq_lane_f32 (f, j, ln2, 1);
 
-  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
-     Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
-     Horner.  */
+  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).  */
   float32x4_t f2 = vmulq_f32 (f, f);
   float32x4_t f4 = vmulq_f32 (f2, f2);
-  float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1);
+  float32x4_t p = vfmaq_f32 (p01, f2, p23);
+  p = vfmaq_laneq_f32 (p, f4, lane_consts, 3);
   p = vfmaq_f32 (f, f2, p);
 
   /* t = 2^i.  */
diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h
index 643a6cdcfc..73e45a942e 100644
--- a/sysdeps/aarch64/fpu/v_log1pf_inline.h
+++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h
@@ -25,54 +25,81 @@
 
 struct v_log1pf_data
 {
-  float32x4_t poly[8], ln2;
   uint32x4_t four;
   int32x4_t three_quarters;
+  float c0, c3, c5, c7;
+  float32x4_t c4, c6, c1, c2, ln2;
 };
 
 /* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
    (1, -0.5) are not stored as they can be generated more efficiently.  */
 #define V_LOG1PF_CONSTANTS_TABLE                                              \
   {                                                                           \
-    .poly                                                                     \
-	= { V4 (0x1.5555aap-2f),  V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),  \
-	    V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f),	V4 (-0x1.0da91p-3f),  \
-	    V4 (0x1.abcb6p-4f),	  V4 (-0x1.6f0d5ep-5f) },                     \
-	.ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000),                   \
-	.three_quarters = V4 (0x3f400000)                                     \
+    .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f),                         \
+    .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f,                         \
+    .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f,                          \
+    .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f,                          \
+    .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000),                       \
+    .three_quarters = V4 (0x3f400000)                                         \
   }
 
 static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *c)
+eval_poly (float32x4_t m, const struct v_log1pf_data *d)
 {
-  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine
-     uses split Estrin, but this way reduces register pressure in the calling
-     routine).  */
-  float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]);
+  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner.  */
+  float32x4_t c0357 = vld1q_f32 (&d->c0);
+  float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0);
   float32x4_t m2 = vmulq_f32 (m, m);
-  q = vfmaq_f32 (m, m2, q);
-  float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1);
+  float32x4_t p = vfmaq_f32 (p45, m2, p67);
+  p = vfmaq_f32 (p23, m2, p);
+  p = vfmaq_f32 (d->c1, m, p);
   p = vmulq_f32 (m2, p);
-  return vfmaq_f32 (q, m2, p);
+  p = vfmaq_f32 (m, m2, p);
+  return vfmaq_f32 (p, m2, q);
 }
 
 static inline float32x4_t
-log1pf_inline (float32x4_t x, const struct v_log1pf_data d)
+log1pf_inline (float32x4_t x, const struct v_log1pf_data *d)
 {
-  /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
-     special-case handling. See that file for details of the algorithm.  */
+  /* Helper for calculating log(x + 1).  */
+
+  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+			   is in [-0.25, 0.5]):
+     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+     We approximate log1p(m) with a polynomial, then scale by
+     k*log(2). Instead of doing this directly, we use an intermediate
+     scale factor s = 4*k*log(2) to ensure the scale is representable
+     as a normalised fp32 number.  */
   float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+
+  /* Choose k to scale x to the range [-1/4, 1/2].  */
   int32x4_t k
-      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters),
+      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
 		   v_s32 (0xff800000));
   uint32x4_t ku = vreinterpretq_u32_s32 (k);
-  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku));
+
+  /* Scale up to ensure that the scale factor is representable as normalised
+     fp32 number, and scale m down accordingly.  */
+  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
+
+  /* Scale x by exponent manipulation.  */
   float32x4_t m_scale
       = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
   m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
-  float32x4_t p = eval_poly (m_scale, d.poly);
+
+  /* Evaluate polynomial on the reduced interval.  */
+  float32x4_t p = eval_poly (m_scale, d);
+
+  /* The scale factor to be applied back at the end - by multiplying float(k)
+     by 2^-23 we get the unbiased exponent of k.  */
   float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
-  return vfmaq_f32 (p, scale_back, d.ln2);
+
+  /* Apply the scaling back.  */
+  return vfmaq_f32 (p, scale_back, d->ln2);
 }
 
 #endif
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index a7a4a94265..8c333fcd29 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1082,24 +1082,24 @@ float: 1
 ldouble: 3
 
 Function: "exp10m1":
-double: 2
-float: 1
-ldouble: 1
+double: 4
+float: 2
+ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-ldouble: 3
+double: 2
+float: 3
+ldouble: 6
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 3
+double: 5
+float: 3
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1130,24 +1130,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-ldouble: 1
+double: 2
+float: 2
+ldouble: 2
 
 Function: "exp2m1_downward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 2
+double: 3
+float: 2
+ldouble: 4
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 5
 
 Function: "exp_advsimd":
 double: 1
@@ -1356,24 +1356,24 @@ float: 2
 ldouble: 1
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 2
+float: 3
+ldouble: 4
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 4
 
 Function: "log1p":
 double: 1
@@ -1653,22 +1653,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 7ef77ee8c9..b76dde1557 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Generic optimized memset using SIMD.
+   Copyright (C) 2012-2024 Free Software Foundation, Inc.
 
    This file is part of the GNU C Library.
 
@@ -17,7 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include "memset-reg.h"
 
 #ifndef MEMSET
 # define MEMSET memset
@@ -25,130 +25,131 @@
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64, unaligned accesses
+ * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
  *
  */
 
-ENTRY (MEMSET)
+#define dstin	x0
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+#define zva_val	x5
+#define off	x3
+#define dstend2	x5
 
+ENTRY (MEMSET)
 	PTR_ARG (0)
 	SIZE_ARG (2)
 
 	dup	v0.16B, valw
+	cmp	count, 16
+	b.lo	L(set_small)
+
 	add	dstend, dstin, count
+	cmp	count, 64
+	b.hs	L(set_128)
 
-	cmp	count, 96
-	b.hi	L(set_long)
-	cmp	count, 16
-	b.hs	L(set_medium)
-	mov	val, v0.D[0]
+	/* Set 16..63 bytes.  */
+	mov	off, 16
+	and	off, off, count, lsr 1
+	sub	dstend2, dstend, off
+	str	q0, [dstin]
+	str	q0, [dstin, off]
+	str	q0, [dstend2, -16]
+	str	q0, [dstend, -16]
+	ret
 
+	.p2align 4
 	/* Set 0..15 bytes.  */
-	tbz	count, 3, 1f
-	str	val, [dstin]
-	str	val, [dstend, -8]
-	ret
-	nop
-1:	tbz	count, 2, 2f
-	str	valw, [dstin]
-	str	valw, [dstend, -4]
+L(set_small):
+	add	dstend, dstin, count
+	cmp	count, 4
+	b.lo	2f
+	lsr	off, count, 3
+	sub	dstend2, dstend, off, lsl 2
+	str	s0, [dstin]
+	str	s0, [dstin, off, lsl 2]
+	str	s0, [dstend2, -4]
+	str	s0, [dstend, -4]
 	ret
+
+	/* Set 0..3 bytes.  */
 2:	cbz	count, 3f
+	lsr	off, count, 1
 	strb	valw, [dstin]
-	tbz	count, 1, 3f
-	strh	valw, [dstend, -2]
+	strb	valw, [dstin, off]
+	strb	valw, [dstend, -1]
 3:	ret
 
-	/* Set 17..96 bytes.  */
-L(set_medium):
-	str	q0, [dstin]
-	tbnz	count, 6, L(set96)
-	str	q0, [dstend, -16]
-	tbz	count, 5, 1f
-	str	q0, [dstin, 16]
-	str	q0, [dstend, -32]
-1:	ret
-
 	.p2align 4
-	/* Set 64..96 bytes.  Write 64 bytes from the start and
-	   32 bytes from the end.  */
-L(set96):
-	str	q0, [dstin, 16]
+L(set_128):
+	bic	dst, dstin, 15
+	cmp	count, 128
+	b.hi	L(set_long)
+	stp	q0, q0, [dstin]
 	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
-	.p2align 3
-	nop
+	.p2align 4
 L(set_long):
-	and	valw, valw, 255
-	bic	dst, dstin, 15
 	str	q0, [dstin]
-	cmp	count, 256
-	ccmp	valw, 0, 0, cs
-	b.eq	L(try_zva)
-L(no_zva):
-	sub	count, dstend, dst	/* Count is 16 too large.  */
-	sub	dst, dst, 16		/* Dst is biased by -32.  */
-	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
-1:	stp	q0, q0, [dst, 32]
-	stp	q0, q0, [dst, 64]!
-L(tail64):
-	subs	count, count, 64
-	b.hi	1b
-2:	stp	q0, q0, [dstend, -64]
+	str	q0, [dst, 16]
+	tst	valw, 255
+	b.ne	L(no_zva)
+#ifndef ZVA64_ONLY
+	mrs	zva_val, dczid_el0
+	and	zva_val, zva_val, 31
+	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
+	b.ne	L(zva_128)
+#endif
+	stp	q0, q0, [dst, 32]
+	bic	dst, dstin, 63
+	sub	count, dstend, dst	/* Count is now 64 too large.  */
+	sub	count, count, 64 + 64	/* Adjust count and bias for loop.  */
+
+	/* Write last bytes before ZVA loop.  */
+	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
+
+	.p2align 4
+L(zva64_loop):
+	add	dst, dst, 64
+	dc	zva, dst
+	subs	count, count, 64
+	b.hi	L(zva64_loop)
 	ret
 
-L(try_zva):
-#ifndef ZVA64_ONLY
 	.p2align 3
-	mrs	tmp1, dczid_el0
-	tbnz	tmp1w, 4, L(no_zva)
-	and	tmp1w, tmp1w, 15
-	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
-	b.ne	 L(zva_128)
-	nop
-#endif
-	/* Write the first and last 64 byte aligned block using stp rather
-	   than using DC ZVA.  This is faster on some cores.
-	 */
-	.p2align 4
-L(zva_64):
-	str	q0, [dst, 16]
+L(no_zva):
+	sub	count, dstend, dst	/* Count is 32 too large.  */
+	sub	count, count, 64 + 32	/* Adjust count and bias for loop.  */
+L(no_zva_loop):
 	stp	q0, q0, [dst, 32]
-	bic	dst, dst, 63
 	stp	q0, q0, [dst, 64]
-	stp	q0, q0, [dst, 96]
-	sub	count, dstend, dst	/* Count is now 128 too large.	*/
-	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
-	add	dst, dst, 128
-1:	dc	zva, dst
 	add	dst, dst, 64
 	subs	count, count, 64
-	b.hi	1b
-	stp	q0, q0, [dst, 0]
-	stp	q0, q0, [dst, 32]
+	b.hi	L(no_zva_loop)
 	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
 #ifndef ZVA64_ONLY
-	.p2align 3
+	.p2align 4
 L(zva_128):
-	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
-	b.ne	L(zva_other)
+	cmp	zva_val, 5		/* ZVA size is 128 bytes.  */
+	b.ne	L(no_zva)
 
-	str	q0, [dst, 16]
 	stp	q0, q0, [dst, 32]
 	stp	q0, q0, [dst, 64]
 	stp	q0, q0, [dst, 96]
 	bic	dst, dst, 127
 	sub	count, dstend, dst	/* Count is now 128 too large.	*/
-	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
-	add	dst, dst, 128
-1:	dc	zva, dst
-	add	dst, dst, 128
+	sub	count, count, 128 + 128	/* Adjust count and bias for loop.  */
+1:	add	dst, dst, 128
+	dc	zva, dst
 	subs	count, count, 128
 	b.hi	1b
 	stp	q0, q0, [dstend, -128]
@@ -156,35 +157,6 @@ L(zva_128):
 	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
-
-L(zva_other):
-	mov	tmp2w, 4
-	lsl	zva_lenw, tmp2w, tmp1w
-	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
-	cmp	count, tmp1
-	blo	L(no_zva)
-
-	sub	tmp2, zva_len, 1
-	add	tmp1, dst, zva_len
-	add	dst, dst, 16
-	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
-	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
-	beq	2f
-1:	stp	q0, q0, [dst], 64
-	stp	q0, q0, [dst, -32]
-	subs	count, count, 64
-	b.hi	1b
-2:	mov	dst, tmp1
-	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
-	subs	count, count, zva_len
-	b.lo	4f
-3:	dc	zva, dst
-	add	dst, dst, zva_len
-	subs	count, count, zva_len
-	b.hs	3b
-4:	add	count, count, zva_len
-	sub	dst, dst, 32		/* Bias dst for tail loop.  */
-	b	L(tail64)
 #endif
 
 END (MEMSET)
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
index 2e6d882fc9..f665b5a891 100644
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -18,7 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <sysdeps/aarch64/memset-reg.h>
 
 /* Assumptions:
  *
@@ -36,6 +35,14 @@
 
 	.arch armv8.2-a+sve
 
+#define dstin   x0
+#define valw    w1
+#define count   x2
+#define dst     x3
+#define dstend  x4
+#define tmp1    x5
+#define tmp2    x6
+
 	.macro st1b_unroll first=0, last=7
 	st1b	z0.b, p0, [dst, \first, mul vl]
 	.if \last-\first
diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S
index 6d714ed0e1..cf1b25f2ed 100644
--- a/sysdeps/aarch64/multiarch/memset_emag.S
+++ b/sysdeps/aarch64/multiarch/memset_emag.S
@@ -18,7 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include "memset-reg.h"
 
 /* Assumptions:
  *
@@ -26,6 +25,13 @@
  *
  */
 
+#define dstin	x0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+
 ENTRY (__memset_emag)
 
 	PTR_ARG (0)
diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S
index 7b21550137..f815c20b03 100644
--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S
+++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S
@@ -18,7 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <sysdeps/aarch64/memset-reg.h>
 
 /* Assumptions:
  *
@@ -26,6 +25,12 @@
  *
  */
 
+#define dstin	x0
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+
 ENTRY (__memset_kunpeng)
 
 	PTR_ARG (0)
diff --git a/sysdeps/aarch64/multiarch/memset_oryon1.S b/sysdeps/aarch64/multiarch/memset_oryon1.S
index b43a43b54e..6fa28a9bd0 100644
--- a/sysdeps/aarch64/multiarch/memset_oryon1.S
+++ b/sysdeps/aarch64/multiarch/memset_oryon1.S
@@ -19,12 +19,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include "memset-reg.h"
 
 /* Assumptions:
    ARMv8-a, AArch64, unaligned accesses
  */
 
+#define dstin	x0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+
 ENTRY (__memset_oryon1)
 
 	PTR_ARG (0)
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index ab2a576cdb..352fb40d3a 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Generic optimized strlen using SIMD.
+   Copyright (C) 2012-2024 Free Software Foundation, Inc.
 
    This file is part of the GNU C Library.
 
@@ -56,36 +57,50 @@ ENTRY (STRLEN)
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	lsr	synd, synd, shift
-	cbz	synd, L(loop)
+	cbz	synd, L(next16)
 
 	rbit	synd, synd
 	clz	result, synd
 	lsr	result, result, 2
 	ret
 
+L(next16):
+	ldr	data, [src, 16]
+	cmeq	vhas_nul.16b, vdata.16b, 0
+	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
+	fmov	synd, dend
+	cbz	synd, L(loop)
+	add	src, src, 16
+#ifndef __AARCH64EB__
+	rbit	synd, synd
+#endif
+	sub	result, src, srcin
+	clz	tmp, synd
+	add	result, result, tmp, lsr 2
+	ret
+
 	.p2align 5
 L(loop):
-	ldr	data, [src, 16]
+	ldr	data, [src, 32]!
 	cmeq	vhas_nul.16b, vdata.16b, 0
-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
+	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
 	fmov	synd, dend
 	cbnz	synd, L(loop_end)
-	ldr	data, [src, 32]!
+	ldr	data, [src, 16]
 	cmeq	vhas_nul.16b, vdata.16b, 0
-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
+	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
 	fmov	synd, dend
 	cbz	synd, L(loop)
-	sub	src, src, 16
+	add	src, src, 16
 L(loop_end):
-	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
-	sub	result, src, srcin
-	fmov	synd, dend
+	sub	result, shift, src, lsl 2	/* (srcin - src) << 2.  */
 #ifndef __AARCH64EB__
 	rbit	synd, synd
+	sub	result, result, 3
 #endif
-	add	result, result, 16
 	clz	tmp, synd
-	add	result, result, tmp, lsr 2
+	sub	result, tmp, result
+	lsr	result, result, 2
 	ret
 
 END (STRLEN)
diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps
index e28c2af683..5c6175406b 100644
--- a/sysdeps/alpha/fpu/libm-test-ulps
+++ b/sysdeps/alpha/fpu/libm-test-ulps
@@ -1410,22 +1410,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/arc/bits/endianness.h b/sysdeps/arc/bits/endianness.h
index 8f17ca84b4..4a6e7d4c15 100644
--- a/sysdeps/arc/bits/endianness.h
+++ b/sysdeps/arc/bits/endianness.h
@@ -5,11 +5,7 @@
 # error "Never use <bits/endian.h> directly; include <endian.h> instead."
 #endif
 
-/* ARC has selectable endianness.  */
-#ifdef __BIG_ENDIAN__
-# define __BYTE_ORDER __BIG_ENDIAN
-#else
-# define __BYTE_ORDER __LITTLE_ENDIAN
-#endif
+/* Only little-endian ARC is supported.  */
+#define __BYTE_ORDER __LITTLE_ENDIAN
 
 #endif /* bits/endianness.h */
diff --git a/sysdeps/arc/configure b/sysdeps/arc/configure
index 08e4f23f89..b4192983bf 100644
--- a/sysdeps/arc/configure
+++ b/sysdeps/arc/configure
@@ -10,7 +10,7 @@ libc_cv_asm_line_sep='`'
 printf "%s\n" "#define ASM_LINE_SEP $libc_cv_asm_line_sep" >>confdefs.h
 
 
-# For big endian ABI, generate a symbol for selecting right dynamic linker
+# ARC big endian ABI is no longer supported.
 
 
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep -e" >&5
@@ -175,13 +175,8 @@ fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arc_be" >&5
 printf "%s\n" "$libc_cv_arc_be" >&6; }
 if test $libc_cv_arc_be = yes; then
-  # For shlib-versions.
-  printf "%s\n" "#define HAVE_ARC_BE 1" >>confdefs.h
-
-  config_vars="$config_vars
-default-abi = arcbe"
-else
-  config_vars="$config_vars
-default-abi = arcle"
+  as_fn_error $? "big-endian not supported" "$LINENO" 5
 fi
+config_vars="$config_vars
+default-abi = arcle"
 
diff --git a/sysdeps/arc/configure.ac b/sysdeps/arc/configure.ac
index 619da4e088..48dc6f5b9b 100644
--- a/sysdeps/arc/configure.ac
+++ b/sysdeps/arc/configure.ac
@@ -9,7 +9,7 @@ libc_cv_have_sdata_section=no
 libc_cv_asm_line_sep='`'
 AC_DEFINE_UNQUOTED(ASM_LINE_SEP, $libc_cv_asm_line_sep)
 
-# For big endian ABI, generate a symbol for selecting right dynamic linker
+# ARC big endian ABI is no longer supported.
 AC_CACHE_CHECK([for big endian],
   [libc_cv_arc_be],
   [AC_EGREP_CPP(yes,[#ifdef __BIG_ENDIAN__
@@ -17,9 +17,6 @@ AC_CACHE_CHECK([for big endian],
                      #endif
   ], libc_cv_arc_be=yes, libc_cv_arc_be=no)])
 if test $libc_cv_arc_be = yes; then
-  # For shlib-versions.
-  AC_DEFINE(HAVE_ARC_BE)
-  LIBC_CONFIG_VAR([default-abi], [arcbe])
-else
-  LIBC_CONFIG_VAR([default-abi], [arcle])
+  AC_MSG_ERROR([big-endian not supported])
 fi
+LIBC_CONFIG_VAR([default-abi], [arcle])
diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps
index 41c8ef16d7..d1665b9b72 100644
--- a/sysdeps/arc/fpu/libm-test-ulps
+++ b/sysdeps/arc/fpu/libm-test-ulps
@@ -787,6 +787,22 @@ Function: "exp10_upward":
 double: 4
 float: 1
 
+Function: "exp10m1":
+double: 4
+float: 2
+
+Function: "exp10m1_downward":
+double: 3
+float: 3
+
+Function: "exp10m1_towardzero":
+double: 2
+float: 3
+
+Function: "exp10m1_upward":
+double: 5
+float: 3
+
 Function: "exp2":
 double: 1
 float: 1
@@ -801,6 +817,22 @@ Function: "exp2_upward":
 double: 2
 float: 1
 
+Function: "exp2m1":
+double: 2
+float: 2
+
+Function: "exp2m1_downward":
+double: 3
+float: 3
+
+Function: "exp2m1_towardzero":
+double: 3
+float: 2
+
+Function: "exp2m1_upward":
+double: 3
+float: 3
+
 Function: "exp_downward":
 double: 1
 float: 1
@@ -943,6 +975,22 @@ Function: "log10_upward":
 double: 3
 float: 4
 
+Function: "log10p1":
+double: 2
+float: 2
+
+Function: "log10p1_downward":
+double: 2
+float: 3
+
+Function: "log10p1_towardzero":
+double: 3
+float: 2
+
+Function: "log10p1_upward":
+double: 2
+float: 3
+
 Function: "log1p":
 double: 1
 float: 1
@@ -971,6 +1019,22 @@ Function: "log2_upward":
 double: 1
 float: 1
 
+Function: "log2p1":
+double: 2
+float: 2
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+
+Function: "log2p1_upward":
+double: 2
+float: 2
+
 Function: "log_towardzero":
 double: 1
 float: 1
@@ -1093,19 +1157,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 9
 
 Function: "tgamma_downward":
 double: 9
-float: 9
 
 Function: "tgamma_towardzero":
 double: 9
-float: 8
 
 Function: "tgamma_upward":
 double: 9
-float: 9
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps
index d3f45957d4..e768024eb5 100644
--- a/sysdeps/arc/nofpu/libm-test-ulps
+++ b/sysdeps/arc/nofpu/libm-test-ulps
@@ -192,9 +192,17 @@ Function: "exp10":
 double: 2
 float: 1
 
+Function: "exp10m1":
+double: 3
+float: 2
+
 Function: "exp2":
 double: 1
 
+Function: "exp2m1":
+double: 2
+float: 2
+
 Function: "expm1":
 double: 1
 float: 1
@@ -205,6 +213,7 @@ float: 7
 
 Function: "hypot":
 double: 1
+float: 1
 
 Function: "j0":
 double: 2
@@ -226,6 +235,10 @@ Function: "log10":
 double: 2
 float: 2
 
+Function: "log10p1":
+double: 1
+float: 2
+
 Function: "log1p":
 double: 1
 float: 1
@@ -234,6 +247,10 @@ Function: "log2":
 double: 2
 float: 1
 
+Function: "log2p1":
+double: 1
+float: 2
+
 Function: "logp1":
 double: 1
 float: 1
@@ -262,7 +279,6 @@ float: 2
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps
index 6480353d39..21fe98a7bd 100644
--- a/sysdeps/arm/libm-test-ulps
+++ b/sysdeps/arm/libm-test-ulps
@@ -778,20 +778,20 @@ double: 2
 float: 1
 
 Function: "exp10m1":
-double: 2
-float: 1
+double: 3
+float: 2
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
+double: 3
+float: 3
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
+double: 2
+float: 3
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
+double: 4
+float: 4
 
 Function: "exp2":
 double: 1
@@ -810,20 +810,20 @@ double: 1
 float: 1
 
 Function: "exp2m1":
-double: 1
-float: 1
+double: 2
+float: 2
 
 Function: "exp2m1_downward":
-double: 2
-float: 1
+double: 3
+float: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
+double: 3
+float: 2
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
+double: 3
+float: 3
 
 Function: "exp_downward":
 double: 1
@@ -967,19 +967,19 @@ float: 2
 
 Function: "log10p1":
 double: 1
-float: 1
+float: 2
 
 Function: "log10p1_downward":
 double: 2
-float: 1
+float: 3
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
 
 Function: "log10p1_upward":
 double: 2
-float: 1
+float: 3
 
 Function: "log1p":
 double: 1
@@ -1152,19 +1152,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps
index fc634f89ca..151df00638 100644
--- a/sysdeps/csky/fpu/libm-test-ulps
+++ b/sysdeps/csky/fpu/libm-test-ulps
@@ -1061,19 +1061,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_downward":
 double: 8
-float: 7
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps
index a1e28c8ee0..48697a7b09 100644
--- a/sysdeps/csky/nofpu/libm-test-ulps
+++ b/sysdeps/csky/nofpu/libm-test-ulps
@@ -1092,19 +1092,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_downward":
 double: 5
-float: 5
 
 Function: "tgamma_towardzero":
 double: 5
-float: 4
 
 Function: "tgamma_upward":
 double: 4
-float: 4
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/generic/internal-signals.h b/sysdeps/generic/internal-signals.h
index 3db100be10..e031a96bac 100644
--- a/sysdeps/generic/internal-signals.h
+++ b/sysdeps/generic/internal-signals.h
@@ -20,6 +20,7 @@
 # define __INTERNAL_SIGNALS_H
 
 #include <signal.h>
+#include <internal-sigset.h>
 #include <sigsetops.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -39,10 +40,32 @@ clear_internal_signals (sigset_t *set)
 {
 }
 
-typedef sigset_t internal_sigset_t;
-
 #define internal_sigemptyset(__s)            __sigemptyset (__s)
+#define internal_sigfillset(__s)             __sigfillset (__s)
 #define internal_sigaddset(__s, __i)         __sigaddset (__s, __i)
 #define internal_sigprocmask(__h, __s, __o)  __sigprocmask (__h, __s, __o)
 
+static inline void
+internal_signal_block_all (internal_sigset_t *oset)
+{
+  internal_sigset_t set;
+  internal_sigfillset (&set);
+  internal_sigprocmask (SIG_BLOCK, &set, oset);
+}
+
+static inline void
+internal_signal_restore_set (const internal_sigset_t *set)
+{
+  internal_sigprocmask (SIG_SETMASK, set, NULL);
+}
+
+static inline void
+internal_signal_unblock_signal (int sig)
+{
+  internal_sigset_t set;
+  internal_sigemptyset (&set);
+  internal_sigaddset (&set, sig);
+  internal_sigprocmask (SIG_UNBLOCK, &set, NULL);
+}
+
 #endif /* __INTERNAL_SIGNALS_H  */
diff --git a/sysdeps/aarch64/memset-reg.h b/sysdeps/generic/internal-sigset.h
index 6c7f60b37e..80279ffc47 100644
--- a/sysdeps/aarch64/memset-reg.h
+++ b/sysdeps/generic/internal-sigset.h
@@ -1,5 +1,5 @@
-/* Register aliases for memset to be used across implementations.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Internal sigset_t definition.
+   Copyright (C) 2022-2023 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,11 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define dstin	x0
-#define val	x1
-#define valw	w1
-#define count	x2
-#define dst	x3
-#define dstend	x4
-#define tmp1	x5
-#define tmp1w	w5
-#define tmp2	x6
-#define tmp2w	w6
-#define zva_len x7
-#define zva_lenw w7
+#ifndef _INTERNAL_SIGSET_H
+#define _INTERNAL_SIGSET_H
+
+#include <signal.h>
+
+typedef sigset_t internal_sigset_t;
+
+#endif
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 656e8a3fa0..259ce2e7d6 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1200,10 +1200,8 @@ extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp);
 
 extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden;
 
-/* These are internal entry points to the two halves of _dl_allocate_tls,
-   only used within rtld.c itself at startup time.  */
 extern void *_dl_allocate_tls_storage (void) attribute_hidden;
-extern void *_dl_allocate_tls_init (void *, bool);
+extern void *_dl_allocate_tls_init (void *result, bool main_thread);
 rtld_hidden_proto (_dl_allocate_tls_init)
 
 /* True if the TCB has been set up.  */
@@ -1241,13 +1239,7 @@ extern void *_dl_open (const char *name, int mode, const void *caller,
 extern int _dl_scope_free (void *) attribute_hidden;
 
 
-/* Add module to slot information data.  If DO_ADD is false, only the
-   required memory is allocated.  Must be called with GL
-   (dl_load_tls_lock) acquired.  If the function has already been called
-   for the link map L with !do_add, then this function will not raise
-   an exception, otherwise it is possible that it encounters a memory
-   allocation failure.  */
-extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add)
+extern bool _dl_add_to_slotinfo (struct link_map *l, bool do_add)
   attribute_hidden;
 
 /* Update slot information data for at least the generation of the
diff --git a/sysdeps/generic/mremap-failure.h b/sysdeps/generic/mremap-failure.h
new file mode 100644
index 0000000000..bc0d476368
--- /dev/null
+++ b/sysdeps/generic/mremap-failure.h
@@ -0,0 +1,25 @@
+/* mremap failure handling.  Generic version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Return exit value on mremap failure with errno ERR.  */
+
+static int
+mremap_failure_exit (int err)
+{
+  return EXIT_FAILURE;
+}
diff --git a/sysdeps/generic/syscall_types.h b/sysdeps/generic/syscall_types.h
new file mode 100644
index 0000000000..2ddeaa2b5f
--- /dev/null
+++ b/sysdeps/generic/syscall_types.h
@@ -0,0 +1,25 @@
+/* Types and macros used for syscall issuing.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYSCALL_TYPES_H
+#define _SYSCALL_TYPES_H
+
+typedef long int __syscall_arg_t;
+#define __SSC(__x) ((__syscall_arg_t) (__x))
+
+#endif
diff --git a/sysdeps/gnu/Makefile b/sysdeps/gnu/Makefile
index c89eb8ed1c..d23ded40a0 100644
--- a/sysdeps/gnu/Makefile
+++ b/sysdeps/gnu/Makefile
@@ -16,37 +16,67 @@
 # <https://www.gnu.org/licenses/>.
 
 ifeq ($(subdir),login)
-sysdep_routines += setutxent getutxent endutxent getutxid getutxline \
-		   pututxline utmpxname updwtmpx getutmpx getutmp
+sysdep_routines += \
+  endutxent \
+  getutmp \
+  getutmpx \
+  getutxent \
+  getutxid \
+  getutxline \
+  pututxline \
+  setutxent \
+  updwtmpx \
+  utmpxname \
+  # sysdep_routines
 
-sysdep_headers += utmpx.h bits/utmpx.h
+sysdep_headers += \
+  bits/utmpx.h \
+  utmpx.h \
+  # sysdep_headers
 endif
 
 
 ifeq ($(subdir),inet)
-sysdep_headers += netinet/udp.h netinet/ip_icmp.h
+sysdep_headers += \
+  netinet/ip_icmp.h \
+  netinet/udp.h \
+  # sysdep_headers
 endif
 
 
 ifeq ($(subdir),misc)
-sysdep_headers += sys/mtio.h
+sysdep_headers += \
+  sys/mtio.h \
+  # sysdep_headers
 endif
 
 
 ifeq ($(subdir),csu)
-routines += unwind-resume
-shared-only-routines += unwind-resume
+routines += \
+  unwind-resume \
+  # routines
+shared-only-routines += \
+  unwind-resume \
+  # shared-only-routines
 CFLAGS-unwind-resume.c += -fexceptions -fasynchronous-unwind-tables
 endif
 
+
 ifeq ($(subdir),rt)
-librt-sysdep_routines += rt-unwind-resume
-librt-shared-only-routines += rt-unwind-resume
+librt-sysdep_routines += \
+  rt-unwind-resume \
+  # librt-sysdep_routines
+librt-shared-only-routines += \
+  rt-unwind-resume \
+  # librt-shared-only-routines
 CFLAGS-rt-unwind-resume.c += -fexceptions -fasynchronous-unwind-tables
 endif
 
+
 ifeq ($(subdir),sysvipc)
-sysdep_headers += bits/ipc-perm.h \
-		  bits/types/struct_msqid_ds.h \
-		  bits/types/struct_shmid_ds.h
+sysdep_headers += \
+  bits/ipc-perm.h \
+  bits/types/struct_msqid_ds.h \
+  bits/types/struct_shmid_ds.h
+  # sysdep_headers
 endif
diff --git a/sysdeps/hppa/dl-fptr.h b/sysdeps/hppa/dl-fptr.h
index 7c5eb0bd30..b7fd9cf71a 100644
--- a/sysdeps/hppa/dl-fptr.h
+++ b/sysdeps/hppa/dl-fptr.h
@@ -19,7 +19,28 @@
 #ifndef dl_hppa_fptr_h
 #define dl_hppa_fptr_h 1
 
-#include <sysdeps/generic/dl-fptr.h>
+/* An FDESC is a function descriptor.  */
+
+struct fdesc
+  {
+    ElfW(Addr) ip;	/* code entry point */
+    ElfW(Addr) gp;	/* global pointer */
+  };
+
+struct fdesc_table
+  {
+    struct fdesc_table *next;
+    unsigned int len;			/* # of entries in fdesc table */
+    volatile unsigned int first_unused;	/* index of first available entry */
+    struct fdesc fdesc[0];
+  };
+
+struct link_map;
+
+extern ElfW(Addr) _dl_boot_fptr_table [];
+
+extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *,
+				 ElfW(Addr));
 
 /* Initialize function pointer code. Call before relocation processing.  */
 extern void _dl_fptr_init (void);
diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps
index ea5101f6b6..57c1155e8b 100644
--- a/sysdeps/hppa/fpu/libm-test-ulps
+++ b/sysdeps/hppa/fpu/libm-test-ulps
@@ -797,20 +797,20 @@ double: 2
 float: 1
 
 Function: "exp10m1":
-double: 2
-float: 1
+double: 4
+float: 2
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
+double: 3
+float: 3
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
+double: 2
+float: 3
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
+double: 5
+float: 4
 
 Function: "exp2":
 double: 1
@@ -829,20 +829,20 @@ double: 1
 float: 1
 
 Function: "exp2m1":
-double: 1
-float: 1
+double: 2
+float: 2
 
 Function: "exp2m1_downward":
-double: 2
-float: 1
+double: 3
+float: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
+double: 3
+float: 2
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
+double: 3
+float: 3
 
 Function: "exp_downward":
 double: 1
@@ -992,20 +992,20 @@ double: 2
 float: 2
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 
 Function: "log10p1_downward":
 double: 2
-float: 1
+float: 3
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
 
 Function: "log10p1_upward":
 double: 2
-float: 1
+float: 3
 
 Function: "log1p":
 double: 1
@@ -1181,20 +1181,16 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 1
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/htl/pt-attr-getdetachstate.c b/sysdeps/htl/pt-attr-getdetachstate.c
index 8b077e38f3..da27398260 100644
--- a/sysdeps/htl/pt-attr-getdetachstate.c
+++ b/sysdeps/htl/pt-attr-getdetachstate.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -26,4 +27,8 @@ __pthread_attr_getdetachstate (const pthread_attr_t *attr, int *detachstate)
   return 0;
 }
 
-weak_alias (__pthread_attr_getdetachstate, pthread_attr_getdetachstate);
+versioned_symbol (libc, __pthread_attr_getdetachstate, pthread_attr_getdetachstate, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_getdetachstate, pthread_attr_getdetachstate, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-attr-getinheritsched.c b/sysdeps/htl/pt-attr-getinheritsched.c
index 078face3eb..ba02e202c3 100644
--- a/sysdeps/htl/pt-attr-getinheritsched.c
+++ b/sysdeps/htl/pt-attr-getinheritsched.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -26,4 +27,8 @@ __pthread_attr_getinheritsched (const pthread_attr_t *attr, int *inheritsched)
   return 0;
 }
 
-weak_alias (__pthread_attr_getinheritsched, pthread_attr_getinheritsched);
+versioned_symbol (libc, __pthread_attr_getinheritsched, pthread_attr_getinheritsched, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_getinheritsched, pthread_attr_getinheritsched, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-attr-getschedparam.c b/sysdeps/htl/pt-attr-getschedparam.c
index 3fa001871e..35eeedc442 100644
--- a/sysdeps/htl/pt-attr-getschedparam.c
+++ b/sysdeps/htl/pt-attr-getschedparam.c
@@ -19,6 +19,7 @@
 #include <pthread.h>
 #include <sched.h>
 #include <string.h>
+#include <shlib-compat.h>
 
 #include <pt-internal.h>
 
@@ -30,4 +31,8 @@ __pthread_attr_getschedparam (const pthread_attr_t *attr,
   return 0;
 }
 
-weak_alias (__pthread_attr_getschedparam, pthread_attr_getschedparam);
+versioned_symbol (libc, __pthread_attr_getschedparam, pthread_attr_getschedparam, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_getschedparam, pthread_attr_getschedparam, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-attr-getschedpolicy.c b/sysdeps/htl/pt-attr-getschedpolicy.c
index 8b4cc7ce48..5deaf01c5e 100644
--- a/sysdeps/htl/pt-attr-getschedpolicy.c
+++ b/sysdeps/htl/pt-attr-getschedpolicy.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -26,4 +27,8 @@ __pthread_attr_getschedpolicy (const pthread_attr_t *attr, int *policy)
   return 0;
 }
 
-weak_alias (__pthread_attr_getschedpolicy, pthread_attr_getschedpolicy);
+versioned_symbol (libc, __pthread_attr_getschedpolicy, pthread_attr_getschedpolicy, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_getschedpolicy, pthread_attr_getschedpolicy, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-attr-setdetachstate.c b/sysdeps/htl/pt-attr-setdetachstate.c
index b40557f816..26b6f91dc6 100644
--- a/sysdeps/htl/pt-attr-setdetachstate.c
+++ b/sysdeps/htl/pt-attr-setdetachstate.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -35,4 +36,8 @@ __pthread_attr_setdetachstate (pthread_attr_t *attr, int detachstate)
   return 0;
 }
 
-weak_alias (__pthread_attr_setdetachstate, pthread_attr_setdetachstate);
+versioned_symbol (libc, __pthread_attr_setdetachstate, pthread_attr_setdetachstate, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_setdetachstate, pthread_attr_setdetachstate, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-attr-setinheritsched.c b/sysdeps/htl/pt-attr-setinheritsched.c
index 533b805b12..4ad0bb1dec 100644
--- a/sysdeps/htl/pt-attr-setinheritsched.c
+++ b/sysdeps/htl/pt-attr-setinheritsched.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -35,4 +36,8 @@ __pthread_attr_setinheritsched (pthread_attr_t *attr, int inheritsched)
   return 0;
 }
 
-weak_alias (__pthread_attr_setinheritsched, pthread_attr_setinheritsched);
+versioned_symbol (libc, __pthread_attr_setinheritsched, pthread_attr_setinheritsched, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_setinheritsched, pthread_attr_setinheritsched, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-attr-setschedpolicy.c b/sysdeps/htl/pt-attr-setschedpolicy.c
index d8086e8316..9648c774d0 100644
--- a/sysdeps/htl/pt-attr-setschedpolicy.c
+++ b/sysdeps/htl/pt-attr-setschedpolicy.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -39,4 +40,8 @@ __pthread_attr_setschedpolicy (pthread_attr_t *attr, int policy)
   return 0;
 }
 
-weak_alias (__pthread_attr_setschedpolicy, pthread_attr_setschedpolicy);
+versioned_symbol (libc, __pthread_attr_setschedpolicy, pthread_attr_setschedpolicy, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_attr_setschedpolicy, pthread_attr_setschedpolicy, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-equal.c b/sysdeps/htl/pt-equal.c
index 6411110608..7727cca370 100644
--- a/sysdeps/htl/pt-equal.c
+++ b/sysdeps/htl/pt-equal.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 /* Return true if __T1 and __T2 both name the same thread.  Otherwise,
@@ -27,4 +28,8 @@ __pthread_equal (pthread_t __t1, pthread_t __t2)
   return __t1 == __t2;
 }
 
-weak_alias (__pthread_equal, pthread_equal);
+versioned_symbol (libc, __pthread_equal, pthread_equal, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_equal, pthread_equal, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-getschedparam.c b/sysdeps/htl/pt-getschedparam.c
index 8ec3d54bd4..0c6a2d0ab3 100644
--- a/sysdeps/htl/pt-getschedparam.c
+++ b/sysdeps/htl/pt-getschedparam.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -28,4 +29,8 @@ __pthread_getschedparam (pthread_t thread, int *policy,
   return 0;
 }
 
-weak_alias (__pthread_getschedparam, pthread_getschedparam);
+versioned_symbol (libc, __pthread_getschedparam, pthread_getschedparam, GLIBC_2_21);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_getschedparam, pthread_getschedparam, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-setschedparam.c b/sysdeps/htl/pt-setschedparam.c
index aea4e7b0e4..5594c9f390 100644
--- a/sysdeps/htl/pt-setschedparam.c
+++ b/sysdeps/htl/pt-setschedparam.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <pthread.h>
+#include <shlib-compat.h>
 #include <pt-internal.h>
 
 int
@@ -26,5 +27,9 @@ __pthread_setschedparam (pthread_t thread, int policy,
   return ENOSYS;
 }
 
-weak_alias (__pthread_setschedparam, pthread_setschedparam);
+versioned_symbol (libc, __pthread_setschedparam, pthread_setschedparam, GLIBC_2_21);
 stub_warning (pthread_setschedparam)
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_21)
+compat_symbol (libc, __pthread_setschedparam, pthread_setschedparam, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pthreadP.h b/sysdeps/htl/pthreadP.h
index cf8a2efe86..b0c9ceb23a 100644
--- a/sysdeps/htl/pthreadP.h
+++ b/sysdeps/htl/pthreadP.h
@@ -92,6 +92,8 @@ int __pthread_attr_setstack (pthread_attr_t *__attr, void *__stackaddr,
 int __pthread_attr_getstack (const pthread_attr_t *, void **, size_t *);
 void __pthread_testcancel (void);
 
+#define __pthread_raise_internal(__sig) raise (__sig)
+
 libc_hidden_proto (__pthread_self)
 
 #if IS_IN (libpthread)
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 03297e6527..d13b05a220 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -1154,28 +1154,28 @@ float128: 3
 ldouble: 2
 
 Function: "exp10m1":
-double: 1
+double: 3
 float: 1
-float128: 1
-ldouble: 1
+float128: 3
+ldouble: 4
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-float128: 3
-ldouble: 2
+double: 3
+float: 3
+float128: 6
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-float128: 3
-ldouble: 2
+double: 2
+float: 2
+float128: 6
+ldouble: 5
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-float128: 3
-ldouble: 2
+double: 5
+float: 3
+float128: 6
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1197,28 +1197,28 @@ float128: 2
 ldouble: 1
 
 Function: "exp2m1":
-double: 1
-float: 1
-float128: 1
-ldouble: 1
-
-Function: "exp2m1_downward":
 double: 2
 float: 1
 float128: 2
-ldouble: 2
+ldouble: 3
+
+Function: "exp2m1_downward":
+double: 3
+float: 3
+float128: 3
+ldouble: 6
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-float128: 2
-ldouble: 2
+double: 3
+float: 2
+float128: 4
+ldouble: 5
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 3
+float128: 5
+ldouble: 6
 
 Function: "exp_downward":
 double: 1
@@ -1427,25 +1427,25 @@ Function: "log10p1":
 double: 1
 float: 2
 float128: 3
-ldouble: 2
+ldouble: 4
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-float128: 2
-ldouble: 4
+float: 3
+float128: 4
+ldouble: 8
 
 Function: "log10p1_towardzero":
 double: 2
 float: 1
-float128: 2
-ldouble: 4
+float128: 3
+ldouble: 8
 
 Function: "log10p1_upward":
 double: 2
-float: 2
-float128: 3
-ldouble: 3
+float: 3
+float128: 4
+ldouble: 6
 
 Function: "log1p":
 double: 1
@@ -1512,7 +1512,7 @@ float128: 2
 ldouble: 3
 
 Function: "log2p1_upward":
-double: 1
+double: 2
 float: 2
 float128: 3
 ldouble: 5
@@ -1699,25 +1699,21 @@ ldouble: 4
 
 Function: "tgamma":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index 85a2456971..0a813aa8dd 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -1157,28 +1157,28 @@ float128: 3
 ldouble: 2
 
 Function: "exp10m1":
-double: 1
+double: 3
 float: 1
-float128: 1
-ldouble: 1
+float128: 3
+ldouble: 4
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-float128: 3
-ldouble: 2
+double: 3
+float: 3
+float128: 6
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-float128: 3
-ldouble: 2
+double: 2
+float: 2
+float128: 6
+ldouble: 5
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-float128: 3
-ldouble: 2
+double: 5
+float: 3
+float128: 6
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1200,28 +1200,28 @@ float128: 2
 ldouble: 1
 
 Function: "exp2m1":
-double: 1
-float: 1
-float128: 1
-ldouble: 1
-
-Function: "exp2m1_downward":
 double: 2
 float: 1
 float128: 2
 ldouble: 3
 
+Function: "exp2m1_downward":
+double: 3
+float: 3
+float128: 3
+ldouble: 6
+
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 2
+float128: 4
+ldouble: 6
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 3
+float128: 5
+ldouble: 6
 
 Function: "exp_downward":
 double: 1
@@ -1430,25 +1430,25 @@ Function: "log10p1":
 double: 1
 float: 2
 float128: 3
-ldouble: 2
+ldouble: 4
 
 Function: "log10p1_downward":
 double: 2
 float: 1
-float128: 2
-ldouble: 4
+float128: 4
+ldouble: 8
 
 Function: "log10p1_towardzero":
 double: 2
 float: 1
-float128: 2
-ldouble: 4
+float128: 3
+ldouble: 8
 
 Function: "log10p1_upward":
 double: 2
 float: 2
-float128: 3
-ldouble: 3
+float128: 4
+ldouble: 6
 
 Function: "log1p":
 double: 1
@@ -1701,25 +1701,21 @@ ldouble: 4
 
 Function: "tgamma":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_upward":
 double: 8
-float: 8
 float128: 4
 ldouble: 5
 
diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c
index 9d17f67a10..6c3fbc0290 100644
--- a/sysdeps/i386/i686/multiarch/s_fma-fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fma-fma.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
index 978345bda5..3ae111aac5 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fma.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
index f049a60f57..aff92fa5b6 100644
--- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c
index e92ebb19a7..c9be68257f 100644
--- a/sysdeps/i386/i686/multiarch/s_fmaf.c
+++ b/sysdeps/i386/i686/multiarch/s_fmaf.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/ieee754/dbl-64/gamma_productf.c b/sysdeps/ieee754/dbl-64/gamma_productf.c
index f3596eeae4..1cc8931700 100644
--- a/sysdeps/ieee754/dbl-64/gamma_productf.c
+++ b/sysdeps/ieee754/dbl-64/gamma_productf.c
@@ -1,44 +1 @@
-/* Compute a product of X, X+1, ..., with an error estimate.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <math.h>
-#include <math-narrow-eval.h>
-#include <math_private.h>
-#include <float.h>
-
-/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
-   - 1, in the form R * (1 + *EPS) where the return value R is an
-   approximation to the product and *EPS is set to indicate the
-   approximate error in the return value.  X is such that all the
-   values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
-   X is small enough that factors quadratic in it can be
-   neglected.  */
-
-float
-__gamma_productf (float x, float x_eps, int n, float *eps)
-{
-  double x_full = (double) x + (double) x_eps;
-  double ret = x_full;
-  for (int i = 1; i < n; i++)
-    ret *= x_full + i;
-
-  float fret = math_narrow_eval ((float) ret);
-  *eps = (ret - fret) / fret;
-
-  return fret;
-}
+/* Not needed.  */
diff --git a/sysdeps/ieee754/flt-32/e_gammaf_r.c b/sysdeps/ieee754/flt-32/e_gammaf_r.c
index a9730d61c1..90ed3b4890 100644
--- a/sysdeps/ieee754/flt-32/e_gammaf_r.c
+++ b/sysdeps/ieee754/flt-32/e_gammaf_r.c
@@ -1,215 +1,150 @@
-/* Implementation of gamma function according to ISO C.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
+/* Implementation of the gamma function for binary32.
 
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
+Copyright (c) 2023-2024 Alexei Sibidanov.
 
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
+The original version of this file was copied from the CORE-MATH
+project (file src/binary32/tgamma/tgammaf.c, revision a48e352).
 
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
 
-#include <math.h>
-#include <math-narrow-eval.h>
-#include <math_private.h>
-#include <fenv_private.h>
-#include <math-underflow.h>
-#include <float.h>
-#include <libm-alias-finite.h>
-
-/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) inside exp in Stirling's
-   approximation to gamma function.  */
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
 
-static const float gamma_coeff[] =
-  {
-    0x1.555556p-4f,
-    -0xb.60b61p-12f,
-    0x3.403404p-12f,
-  };
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+ */
 
-#define NCOEFF (sizeof (gamma_coeff) / sizeof (gamma_coeff[0]))
+/* Changes with respect to the original CORE-MATH code:
+   - removed the dealing with errno
+     (this is done in the wrapper math/w_tgammaf_compat.c)
+   - usage of math_narrow_eval to deal with underflow/overflow
+   - deal with signgamp
+ */
 
-/* Return gamma (X), for positive X less than 42, in the form R *
-   2^(*EXP2_ADJ), where R is the return value and *EXP2_ADJ is set to
-   avoid overflow or underflow in intermediate calculations.  */
+#include <math.h>
+#include <float.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <libm-alias-finite.h>
+#include <math-narrow-eval.h>
 
-static float
-gammaf_positive (float x, int *exp2_adj)
-{
-  int local_signgam;
-  if (x < 0.5f)
-    {
-      *exp2_adj = 0;
-      return __ieee754_expf (__ieee754_lgammaf_r (x + 1, &local_signgam)) / x;
-    }
-  else if (x <= 1.5f)
-    {
-      *exp2_adj = 0;
-      return __ieee754_expf (__ieee754_lgammaf_r (x, &local_signgam));
-    }
-  else if (x < 2.5f)
-    {
-      *exp2_adj = 0;
-      float x_adj = x - 1;
-      return (__ieee754_expf (__ieee754_lgammaf_r (x_adj, &local_signgam))
-	      * x_adj);
-    }
-  else
-    {
-      float eps = 0;
-      float x_eps = 0;
-      float x_adj = x;
-      float prod = 1;
-      if (x < 4.0f)
-	{
-	  /* Adjust into the range for applying Stirling's
-	     approximation.  */
-	  float n = ceilf (4.0f - x);
-	  x_adj = math_narrow_eval (x + n);
-	  x_eps = (x - (x_adj - n));
-	  prod = __gamma_productf (x_adj - n, x_eps, n, &eps);
-	}
-      /* The result is now gamma (X_ADJ + X_EPS) / (PROD * (1 + EPS)).
-	 Compute gamma (X_ADJ + X_EPS) using Stirling's approximation,
-	 starting by computing pow (X_ADJ, X_ADJ) with a power of 2
-	 factored out.  */
-      float exp_adj = -eps;
-      float x_adj_int = roundf (x_adj);
-      float x_adj_frac = x_adj - x_adj_int;
-      int x_adj_log2;
-      float x_adj_mant = __frexpf (x_adj, &x_adj_log2);
-      if (x_adj_mant < M_SQRT1_2f)
-	{
-	  x_adj_log2--;
-	  x_adj_mant *= 2.0f;
-	}
-      *exp2_adj = x_adj_log2 * (int) x_adj_int;
-      float ret = (__ieee754_powf (x_adj_mant, x_adj)
-		   * __ieee754_exp2f (x_adj_log2 * x_adj_frac)
-		   * __ieee754_expf (-x_adj)
-		   * sqrtf (2 * M_PIf / x_adj)
-		   / prod);
-      exp_adj += x_eps * __ieee754_logf (x_adj);
-      float bsum = gamma_coeff[NCOEFF - 1];
-      float x_adj2 = x_adj * x_adj;
-      for (size_t i = 1; i <= NCOEFF - 1; i++)
-	bsum = bsum / x_adj2 + gamma_coeff[NCOEFF - 1 - i];
-      exp_adj += bsum / x_adj;
-      return ret + ret * __expm1f (exp_adj);
-    }
-}
+typedef union {float f; uint32_t u;} b32u32_u;
+typedef union {double f; uint64_t u;} b64u64_u;
 
 float
 __ieee754_gammaf_r (float x, int *signgamp)
 {
-  int32_t hx;
-  float ret;
+  /* The wrapper in math/w_tgamma_template.c expects *signgamp to be set to a
+     non-negative value if the returned value is gamma(x), and to a negative
+     value if it is -gamma(x).
+     Since the code here directly computes gamma(x), we set it to 1.
+  */
+  if (signgamp != NULL)
+    *signgamp = 1;
 
-  GET_FLOAT_WORD (hx, x);
+  /* List of exceptional cases. Each entry contains the 32-bit encoding u of x,
+     a binary32 approximation f of gamma(x), and a correction term df.  */
+  static const struct {uint32_t u; float f, df;} tb[] = {
+    {0x27de86a9u, 0x1.268266p+47f, 0x1p22f},      // x = 0x1.bd0d52p-48
+    {0x27e05475u, 0x1.242422p+47f, 0x1p22f},      // x = 0x1.c0a8eap-48
+    {0xb63befb3u, -0x1.5cb6e4p+18f, 0x1p-7f},     // x = -0x1.77df66p-19
+    {0x3c7bb570u, 0x1.021d9p+6f, 0x1p-19f},       // x = 0x1.f76aep-7
+    {0x41e886d1u, 0x1.33136ap+98f, 0x1p73f},      // x = 0x1.d10da2p+4
+    {0xc067d177u, 0x1.f6850cp-3f, 0x1p-28f},      // x = -0x1.cfa2eep+1
+    {0xbd99da31u, -0x1.befe66p+3, -0x1p-22f},     // x = -0x1.33b462p-4
+    {0xbf54c45au, -0x1.a6b4ecp+2, +0x1p-23f},     // x = -0x1.a988b4p-1
+    {0x41ee77feu, 0x1.d3631cp+101, -0x1p-76f},    // x = 0x1.dceffcp+4
+    {0x3f843a64u, 0x1.f6c638p-1, 0x1p-26f},       // x = 0x1.0874c8p+0
+  };
 
-  if (__glibc_unlikely ((hx & 0x7fffffff) == 0))
-    {
-      /* Return value for x == 0 is Inf with divide by zero exception.  */
-      *signgamp = 0;
-      return 1.0 / x;
+  b32u32_u t = {.f = x};
+  uint32_t ax = t.u<<1;
+  if(__builtin_expect(ax>=(0xffu<<24), 0)){ /* x=NaN or +/-Inf */
+    if(ax==(0xffu<<24)){ /* x=+/-Inf */
+      if(t.u>>31){ /* x=-Inf */
+        return x / x; /* will raise the "Invalid operation" exception */
+      }
+      return x; /* x=+Inf */
     }
-  if (__builtin_expect (hx < 0, 0)
-      && (uint32_t) hx < 0xff800000 && rintf (x) == x)
-    {
-      /* Return value for integer x < 0 is NaN with invalid exception.  */
-      *signgamp = 0;
-      return (x - x) / (x - x);
+    return x + x; /* x=NaN, where x+x ensures the "Invalid operation"
+                     exception is set if x is sNaN */
+  }
+  double z = x;
+  if(__builtin_expect(ax<0x6d000000u, 0)){ /* |x| < 0x1p-18 */
+    volatile double d = (0x1.fa658c23b1578p-1 - 0x1.d0a118f324b63p-1*z)*z - 0x1.2788cfc6fb619p-1;
+    double f = 1.0/z + d;
+    float r = f;
+    b64u64_u rt = {.f = f};
+    if(((rt.u+2)&0xfffffff) < 4){
+      for(unsigned i=0;i<sizeof(tb)/sizeof(tb[0]);i++)
+	if(t.u==tb[i].u) return tb[i].f + tb[i].df;
     }
-  if (__glibc_unlikely (hx == 0xff800000))
-    {
-      /* x == -Inf.  According to ISO this is NaN.  */
-      *signgamp = 0;
-      return x - x;
+    return r;
+  }
+  float fx = __builtin_floorf(x);
+  if(__builtin_expect(x >= 0x1.18522p+5f, 0)){
+    /* Overflow case. The original CORE-MATH code returns 0x1p127f * 0x1p127f,
+       but apparently some compilers replace this by +Inf.  */
+    return math_narrow_eval (x * 0x1p127f);
+  }
+  /* compute k only after the overflow check, otherwise the case to integer
+     might overflow */
+  int k = fx;
+  if(__builtin_expect(fx==x, 0)){ /* x is integer */
+    if(x == 0.0f){
+      return 1.0f/x;
     }
-  if (__glibc_unlikely ((hx & 0x7f800000) == 0x7f800000))
-    {
-      /* Positive infinity (return positive infinity) or NaN (return
-	 NaN).  */
-      *signgamp = 0;
-      return x + x;
+    if(x < 0.0f){
+      return 0.0f / 0.0f; /* should raise the "Invalid operation" exception */
     }
+    double t0 = 1, x0 = 1;
+    for(int i=1; i<k; i++, x0 += 1.0) t0 *= x0;
+    return t0;
+  }
+  if(__builtin_expect(x<-42.0f, 0)){ /* negative non-integer */
+    /* For x < -42, x non-integer, |gamma(x)| < 2^-151.  */
+    static const float sgn[2] = {0x1p-127f, -0x1p-127f};
+    /* Underflows always happens */
+    return math_narrow_eval (0x1p-127f * sgn[k&1]);
+  }
+  /* The array c[] stores a degree-15 polynomial approximation for gamma(x).  */
+  static const double c[] =
+    {0x1.c9a76be577123p+0, 0x1.8f2754ddcf90dp+0, 0x1.0d1191949419bp+0, 0x1.e1f42cf0ae4a1p-2,
+     0x1.82b358a3ab638p-3, 0x1.e1f2b30cd907bp-5, 0x1.240f6d4071bd8p-6, 0x1.1522c9f3cd012p-8,
+     0x1.1fd0051a0525bp-10, 0x1.9808a8b96c37ep-13, 0x1.b3f78e01152b5p-15, 0x1.49c85a7e1fd04p-18,
+     0x1.471ca49184475p-19, -0x1.368f0b7ed9e36p-23, 0x1.882222f9049efp-23, -0x1.a69ed2042842cp-25};
 
-  if (x >= 36.0f)
-    {
-      /* Overflow.  */
-      *signgamp = 0;
-      ret = math_narrow_eval (FLT_MAX * FLT_MAX);
-      return ret;
-    }
-  else
-    {
-      SET_RESTORE_ROUNDF (FE_TONEAREST);
-      if (x > 0.0f)
-	{
-	  *signgamp = 0;
-	  int exp2_adj;
-	  float tret = gammaf_positive (x, &exp2_adj);
-	  ret = __scalbnf (tret, exp2_adj);
-	}
-      else if (x >= -FLT_EPSILON / 4.0f)
-	{
-	  *signgamp = 0;
-	  ret = 1.0f / x;
-	}
-      else
-	{
-	  float tx = truncf (x);
-	  *signgamp = (tx == 2.0f * truncf (tx / 2.0f)) ? -1 : 1;
-	  if (x <= -42.0f)
-	    /* Underflow.  */
-	    ret = FLT_MIN * FLT_MIN;
-	  else
-	    {
-	      float frac = tx - x;
-	      if (frac > 0.5f)
-		frac = 1.0f - frac;
-	      float sinpix = (frac <= 0.25f
-			      ? __sinf (M_PIf * frac)
-			      : __cosf (M_PIf * (0.5f - frac)));
-	      int exp2_adj;
-	      float tret = M_PIf / (-x * sinpix
-				    * gammaf_positive (-x, &exp2_adj));
-	      ret = __scalbnf (tret, -exp2_adj);
-	      math_check_force_underflow_nonneg (ret);
-	    }
-	}
-      ret = math_narrow_eval (ret);
-    }
-  if (isinf (ret) && x != 0)
-    {
-      if (*signgamp < 0)
-	{
-	  ret = math_narrow_eval (-copysignf (FLT_MAX, ret) * FLT_MAX);
-	  ret = -ret;
-	}
-      else
-	ret = math_narrow_eval (copysignf (FLT_MAX, ret) * FLT_MAX);
-      return ret;
-    }
-  else if (ret == 0)
-    {
-      if (*signgamp < 0)
-	{
-	  ret = math_narrow_eval (-copysignf (FLT_MIN, ret) * FLT_MIN);
-	  ret = -ret;
-	}
-      else
-	ret = math_narrow_eval (copysignf (FLT_MIN, ret) * FLT_MIN);
-      return ret;
+  double m = z - 0x1.7p+1, i = __builtin_roundeven(m), step = __builtin_copysign(1.0,i);
+  double d = m - i, d2 = d*d, d4 = d2*d2, d8 = d4*d4;
+  double f = (c[0] + d*c[1]) + d2*(c[2] + d*c[3]) + d4*((c[4] + d*c[5]) + d2*(c[6] + d*c[7]))
+    + d8*((c[8] + d*c[9]) + d2*(c[10] + d*c[11]) + d4*((c[12] + d*c[13]) + d2*(c[14] + d*c[15])));
+  int jm = __builtin_fabs(i);
+  double w = 1;
+  if(jm){
+    z -= 0.5 + step*0.5;
+    w = z;
+    for(int j=jm-1; j; j--) {z -= step; w *= z;}
+  }
+  if(i<=-0.5) w = 1/w;
+  f *= w;
+  b64u64_u rt = {.f = f};
+  float r = f;
+  /* Deal with exceptional cases.  */
+  if(__builtin_expect(((rt.u+2)&0xfffffff) < 8, 0)){
+    for(unsigned j=0;j<sizeof(tb)/sizeof(tb[0]);j++) {
+      if(t.u==tb[j].u) return tb[j].f + tb[j].df;
     }
-  else
-    return ret;
+  }
+  return r;
 }
 libm_alias_finite (__ieee754_gammaf_r, __gammaf_r)
diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
index 729f22cd4f..dc07ebd459 100644
--- a/sysdeps/ieee754/flt-32/math_config.h
+++ b/sysdeps/ieee754/flt-32/math_config.h
@@ -166,9 +166,9 @@ extern const struct exp2f_data
   uint64_t tab[1 << EXP2F_TABLE_BITS];
   double shift_scaled;
   double poly[EXP2F_POLY_ORDER];
-  double shift;
   double invln2_scaled;
   double poly_scaled[EXP2F_POLY_ORDER];
+  double shift;
 } __exp2f_data attribute_hidden;
 
 #define LOGF_TABLE_BITS 4
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index ab6f1da7c0..a15d8e0ab6 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
 	      {
 		td->arg = _dl_make_tlsdesc_dynamic (sym_map,
 			      sym->st_value + reloc->r_addend);
+# ifndef __loongarch_soft_float
+		if (RTLD_SUPPORT_LASX)
+		  td->entry = _dl_tlsdesc_dynamic_lasx;
+		else if (RTLD_SUPPORT_LSX)
+		  td->entry = _dl_tlsdesc_dynamic_lsx;
+		else
+# endif
 		td->entry = _dl_tlsdesc_dynamic;
 	      }
 	    else
diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
new file mode 100644
index 0000000000..3daf991001
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
@@ -0,0 +1,222 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define FRAME_SIZE	  (-((-14 * SZREG) & ALMASK))
+
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
+	     if (__glibc_likely (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }  */
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+	ADDI	sp, sp, -32
+	cfi_adjust_cfa_offset (32)
+	REG_S	t0, sp, 0
+	REG_S	t1, sp, 8
+	REG_S	t2, sp, 16
+	cfi_rel_offset (12, 0)
+	cfi_rel_offset (13, 8)
+	cfi_rel_offset (14, 16)
+
+/* Runtime Storage Layout of Thread-Local Storage
+   TP point to the start of TLS block.
+
+				      dtv
+Low address	TCB ----------------> dtv0(counter)
+	 TP -->	static_block0  <----- dtv1
+		static_block1  <----- dtv2
+		static_block2  <----- dtv3
+		dynamic_block0 <----- dtv4
+Hign address	dynamic_block1 <----- dtv5  */
+
+	REG_L	t0, tp, -SIZE_OF_TCB	  /* t0 = dtv */
+	REG_L	a0, a0, TLSDESC_ARG	  /* a0(td) = tdp->arg */
+	REG_L	t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
+	REG_L	t2, t0, DTV_COUNTER	  /* t2 = dtv[0].counter */
+	/* If dtv[0].counter < td->gen_count, goto slow path.  */
+	bltu	t2, t1, .Lslow
+
+	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
+	/* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
+	slli.d	t1, t1, 4
+	add.d	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
+	REG_L	t1, t1, 0   /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
+	li.d	t2, TLS_DTV_UNALLOCATED
+	/* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
+	   goto slow path.  */
+	beq	t1, t2, .Lslow
+
+	cfi_remember_state
+	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
+	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
+	add.d	a0, t1, t2
+.Lret:
+	sub.d	a0, a0, tp
+	REG_L	t0, sp, 0
+	REG_L	t1, sp, 8
+	REG_L	t2, sp, 16
+	ADDI	sp, sp, 32
+	cfi_adjust_cfa_offset (-32)
+	RET
+
+.Lslow:
+	/* This is the slow path.  We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+	cfi_restore_state
+	ADDI	sp, sp, -FRAME_SIZE
+	cfi_adjust_cfa_offset (FRAME_SIZE)
+	REG_S	ra, sp, 0 * SZREG
+	REG_S	a1, sp, 1 * SZREG
+	REG_S	a2, sp, 2 * SZREG
+	REG_S	a3, sp, 3 * SZREG
+	REG_S	a4, sp, 4 * SZREG
+	REG_S	a5, sp, 5 * SZREG
+	REG_S	a6, sp, 6 * SZREG
+	REG_S	a7, sp, 7 * SZREG
+	REG_S	t3, sp, 8 * SZREG
+	REG_S	t4, sp, 9 * SZREG
+	REG_S	t5, sp, 10 * SZREG
+	REG_S	t6, sp, 11 * SZREG
+	REG_S	t7, sp, 12 * SZREG
+	REG_S	t8, sp, 13 * SZREG
+	cfi_rel_offset (1, 0 * SZREG)
+	cfi_rel_offset (5, 1 * SZREG)
+	cfi_rel_offset (6, 2 * SZREG)
+	cfi_rel_offset (7, 3 * SZREG)
+	cfi_rel_offset (8, 4 * SZREG)
+	cfi_rel_offset (9, 5 * SZREG)
+	cfi_rel_offset (10, 6 * SZREG)
+	cfi_rel_offset (11, 7 * SZREG)
+	cfi_rel_offset (15, 8 * SZREG)
+	cfi_rel_offset (16, 9 * SZREG)
+	cfi_rel_offset (17, 10 * SZREG)
+	cfi_rel_offset (18, 11 * SZREG)
+	cfi_rel_offset (19, 12 * SZREG)
+	cfi_rel_offset (20, 13 * SZREG)
+
+#ifndef __loongarch_soft_float
+
+	/* Save fcsr0 register.
+	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
+	   of some fields in fcsr0.  */
+	movfcsr2gr  t0, fcsr0
+	st.w	t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2.  */
+
+#ifdef USE_LASX
+  #define V_REG_S xvst
+  #define V_REG_L xvld
+  #define V_SPACE (-((-32 * SZXREG) & ALMASK)) /* Space for LASX registers.  */
+  #define V_REG(n) $xr##n
+  #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,  \
+		 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+  #define V_REGSZ SZXREG
+#elif defined USE_LSX
+  #define V_REG_S vst
+  #define V_REG_L vld
+  #define V_SPACE (-((-32 * SZVREG) & ALMASK)) /* Space for LSX registers.  */
+  #define V_REG(n) $vr##n
+  #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,  \
+		 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+  #define V_REGSZ SZVREG
+#else
+  #define V_REG_S fst.d
+  #define V_REG_L fld.d
+  #define V_SPACE (-((-24 * SZFREG) & ALMASK)) /* Space for FLOAT registers.  */
+  #define V_REG(n) $f##n
+  #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+  #define V_REGSZ SZFREG
+#endif
+
+	ADDI	sp, sp, -V_SPACE
+	cfi_adjust_cfa_offset (V_SPACE)
+	.irp	i,V_REGS
+        V_REG_S	V_REG(\i), sp, \i * V_REGSZ
+	.endr
+
+#endif /* #ifndef __loongarch_soft_float */
+
+	bl	HIDDEN_JUMPTARGET(__tls_get_addr)
+	ADDI	a0, a0, -TLS_DTV_OFFSET
+
+#ifndef __loongarch_soft_float
+
+	.irp	i,V_REGS
+	V_REG_L	V_REG(\i), sp, \i * V_REGSZ
+	.endr
+	ADDI	sp, sp, V_SPACE
+	cfi_adjust_cfa_offset (-V_SPACE)
+
+	/* Restore fcsr0 register.  */
+	ld.w	t0, sp, FRAME_SIZE + 24
+	movgr2fcsr  fcsr0, t0
+
+#endif /* #ifndef __loongarch_soft_float */
+
+	REG_L	ra, sp, 0 * SZREG
+	REG_L	a1, sp, 1 * SZREG
+	REG_L	a2, sp, 2 * SZREG
+	REG_L	a3, sp, 3 * SZREG
+	REG_L	a4, sp, 4 * SZREG
+	REG_L	a5, sp, 5 * SZREG
+	REG_L	a6, sp, 6 * SZREG
+	REG_L	a7, sp, 7 * SZREG
+	REG_L	t3, sp, 8 * SZREG
+	REG_L	t4, sp, 9 * SZREG
+	REG_L	t5, sp, 10 * SZREG
+	REG_L	t6, sp, 11 * SZREG
+	REG_L	t7, sp, 12 * SZREG
+	REG_L	t8, sp, 13 * SZREG
+	ADDI	sp, sp, FRAME_SIZE
+	cfi_adjust_cfa_offset (-FRAME_SIZE)
+
+	b	.Lret
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+	.hidden HIDDEN_JUMPTARGET(__tls_get_addr)
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
index a6627cc754..be76c07cce 100644
--- a/sysdeps/loongarch/dl-tlsdesc.S
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -59,376 +59,46 @@ _dl_tlsdesc_undefweak:
 	cfi_endproc
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 
-
 #ifdef SHARED
 
-#define FRAME_SIZE	  (-((-14 * SZREG) & ALMASK))
-#define FRAME_SIZE_LSX	  (-((-32 * SZVREG) & ALMASK))
-#define FRAME_SIZE_LASX	  (-((-32 * SZXREG) & ALMASK))
-#define FRAME_SIZE_FLOAT  (-((-24 * SZFREG) & ALMASK))
-
-	/* Handler for dynamic TLS symbols.
-	   Prototype:
-	   _dl_tlsdesc_dynamic (tlsdesc *) ;
-
-	   The second word of the descriptor points to a
-	   tlsdesc_dynamic_arg structure.
-
-	   Returns the offset between the thread pointer and the
-	   object referenced by the argument.
-
-	   ptrdiff_t
-	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
-	   {
-	     struct tlsdesc_dynamic_arg *td = tdp->arg;
-	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
-	     if (__glibc_likely (td->gen_count <= dtv[0].counter
-		&& (dtv[td->tlsinfo.ti_module].pointer.val
-		    != TLS_DTV_UNALLOCATED),
-		1))
-	       return dtv[td->tlsinfo.ti_module].pointer.val
-		+ td->tlsinfo.ti_offset
-		- __thread_pointer;
-
-	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
-	   }  */
-	.hidden _dl_tlsdesc_dynamic
-	.global	_dl_tlsdesc_dynamic
-	.type	_dl_tlsdesc_dynamic,%function
-	cfi_startproc
-	.align 2
-_dl_tlsdesc_dynamic:
-	/* Save just enough registers to support fast path, if we fall
-	   into slow path we will save additional registers.  */
-	ADDI	sp, sp, -32
-	REG_S	t0, sp, 0
-	REG_S	t1, sp, 8
-	REG_S	t2, sp, 16
-
-/* Runtime Storage Layout of Thread-Local Storage
-   TP point to the start of TLS block.
-
-				      dtv
-Low address	TCB ----------------> dtv0(counter)
-	 TP -->	static_block0  <----- dtv1
-		static_block1  <----- dtv2
-		static_block2  <----- dtv3
-		dynamic_block0 <----- dtv4
-Hign address	dynamic_block1 <----- dtv5  */
-
-	REG_L	t0, tp, -SIZE_OF_TCB	  /* t0 = dtv */
-	REG_L	a0, a0, TLSDESC_ARG	  /* a0(td) = tdp->arg */
-	REG_L	t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
-	REG_L	t2, t0, DTV_COUNTER	  /* t2 = dtv[0].counter */
-	/* If dtv[0].counter < td->gen_count, goto slow path.  */
-	bltu	t2, t1, .Lslow
-
-	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
-	/* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
-	slli.d	t1, t1, 4
-	add.d	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
-	REG_L	t1, t1, 0   /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
-	li.d	t2, TLS_DTV_UNALLOCATED
-	/* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
-	   goto slow path.  */
-	beq	t1, t2, .Lslow
-
-	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
-	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
-	add.d	a0, t1, t2
-.Lret:
-	sub.d	a0, a0, tp
-	REG_L	t0, sp, 0
-	REG_L	t1, sp, 8
-	REG_L	t2, sp, 16
-	ADDI	sp, sp, 32
-	RET
-
-.Lslow:
-	/* This is the slow path. We need to call __tls_get_addr() which
-	   means we need to save and restore all the register that the
-	   callee will trash.  */
-
-	/* Save the remaining registers that we must treat as caller save.  */
-	ADDI	sp, sp, -FRAME_SIZE
-	REG_S	ra, sp, 0 * SZREG
-	REG_S	a1, sp, 1 * SZREG
-	REG_S	a2, sp, 2 * SZREG
-	REG_S	a3, sp, 3 * SZREG
-	REG_S	a4, sp, 4 * SZREG
-	REG_S	a5, sp, 5 * SZREG
-	REG_S	a6, sp, 6 * SZREG
-	REG_S	a7, sp, 7 * SZREG
-	REG_S	t3, sp, 8 * SZREG
-	REG_S	t4, sp, 9 * SZREG
-	REG_S	t5, sp, 10 * SZREG
-	REG_S	t6, sp, 11 * SZREG
-	REG_S	t7, sp, 12 * SZREG
-	REG_S	t8, sp, 13 * SZREG
-
 #ifndef __loongarch_soft_float
 
-	/* Save fcsr0 register.
-	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
-	   of some fields in fcsr0.  */
-	movfcsr2gr  t0, fcsr0
-	st.w	t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
-
-	/* Whether support LASX.  */
-	la.global   t0, _rtld_global_ro
-	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
-	andi	t1, t0, HWCAP_LOONGARCH_LASX
-	beqz	t1, .Llsx
-
-	/* Save 256-bit vector registers.
-	   FIXME: Without vector ABI, save all vector registers.  */
-	ADDI	sp, sp, -FRAME_SIZE_LASX
-	xvst	xr0, sp, 0*SZXREG
-	xvst	xr1, sp, 1*SZXREG
-	xvst	xr2, sp, 2*SZXREG
-	xvst	xr3, sp, 3*SZXREG
-	xvst	xr4, sp, 4*SZXREG
-	xvst	xr5, sp, 5*SZXREG
-	xvst	xr6, sp, 6*SZXREG
-	xvst	xr7, sp, 7*SZXREG
-	xvst	xr8, sp, 8*SZXREG
-	xvst	xr9, sp, 9*SZXREG
-	xvst	xr10, sp, 10*SZXREG
-	xvst	xr11, sp, 11*SZXREG
-	xvst	xr12, sp, 12*SZXREG
-	xvst	xr13, sp, 13*SZXREG
-	xvst	xr14, sp, 14*SZXREG
-	xvst	xr15, sp, 15*SZXREG
-	xvst	xr16, sp, 16*SZXREG
-	xvst	xr17, sp, 17*SZXREG
-	xvst	xr18, sp, 18*SZXREG
-	xvst	xr19, sp, 19*SZXREG
-	xvst	xr20, sp, 20*SZXREG
-	xvst	xr21, sp, 21*SZXREG
-	xvst	xr22, sp, 22*SZXREG
-	xvst	xr23, sp, 23*SZXREG
-	xvst	xr24, sp, 24*SZXREG
-	xvst	xr25, sp, 25*SZXREG
-	xvst	xr26, sp, 26*SZXREG
-	xvst	xr27, sp, 27*SZXREG
-	xvst	xr28, sp, 28*SZXREG
-	xvst	xr29, sp, 29*SZXREG
-	xvst	xr30, sp, 30*SZXREG
-	xvst	xr31, sp, 31*SZXREG
-	b	    .Ltga
-
-.Llsx:
-	/* Whether support LSX.  */
-	andi	t1, t0, HWCAP_LOONGARCH_LSX
-	beqz	t1, .Lfloat
-
-	/* Save 128-bit vector registers.  */
-	ADDI	sp, sp, -FRAME_SIZE_LSX
-	vst	vr0, sp, 0*SZVREG
-	vst	vr1, sp, 1*SZVREG
-	vst	vr2, sp, 2*SZVREG
-	vst	vr3, sp, 3*SZVREG
-	vst	vr4, sp, 4*SZVREG
-	vst	vr5, sp, 5*SZVREG
-	vst	vr6, sp, 6*SZVREG
-	vst	vr7, sp, 7*SZVREG
-	vst	vr8, sp, 8*SZVREG
-	vst	vr9, sp, 9*SZVREG
-	vst	vr10, sp, 10*SZVREG
-	vst	vr11, sp, 11*SZVREG
-	vst	vr12, sp, 12*SZVREG
-	vst	vr13, sp, 13*SZVREG
-	vst	vr14, sp, 14*SZVREG
-	vst	vr15, sp, 15*SZVREG
-	vst	vr16, sp, 16*SZVREG
-	vst	vr17, sp, 17*SZVREG
-	vst	vr18, sp, 18*SZVREG
-	vst	vr19, sp, 19*SZVREG
-	vst	vr20, sp, 20*SZVREG
-	vst	vr21, sp, 21*SZVREG
-	vst	vr22, sp, 22*SZVREG
-	vst	vr23, sp, 23*SZVREG
-	vst	vr24, sp, 24*SZVREG
-	vst	vr25, sp, 25*SZVREG
-	vst	vr26, sp, 26*SZVREG
-	vst	vr27, sp, 27*SZVREG
-	vst	vr28, sp, 28*SZVREG
-	vst	vr29, sp, 29*SZVREG
-	vst	vr30, sp, 30*SZVREG
-	vst	vr31, sp, 31*SZVREG
-	b	    .Ltga
-
-.Lfloat:
-	/* Save float registers.  */
-	ADDI	sp, sp, -FRAME_SIZE_FLOAT
-	FREG_S	fa0, sp, 0*SZFREG
-	FREG_S	fa1, sp, 1*SZFREG
-	FREG_S	fa2, sp, 2*SZFREG
-	FREG_S	fa3, sp, 3*SZFREG
-	FREG_S	fa4, sp, 4*SZFREG
-	FREG_S	fa5, sp, 5*SZFREG
-	FREG_S	fa6, sp, 6*SZFREG
-	FREG_S	fa7, sp, 7*SZFREG
-	FREG_S	ft0, sp, 8*SZFREG
-	FREG_S	ft1, sp, 9*SZFREG
-	FREG_S	ft2, sp, 10*SZFREG
-	FREG_S	ft3, sp, 11*SZFREG
-	FREG_S	ft4, sp, 12*SZFREG
-	FREG_S	ft5, sp, 13*SZFREG
-	FREG_S	ft6, sp, 14*SZFREG
-	FREG_S	ft7, sp, 15*SZFREG
-	FREG_S	ft8, sp, 16*SZFREG
-	FREG_S	ft9, sp, 17*SZFREG
-	FREG_S	ft10, sp, 18*SZFREG
-	FREG_S	ft11, sp, 19*SZFREG
-	FREG_S	ft12, sp, 20*SZFREG
-	FREG_S	ft13, sp, 21*SZFREG
-	FREG_S	ft14, sp, 22*SZFREG
-	FREG_S	ft15, sp, 23*SZFREG
-
-#endif /* #ifndef __loongarch_soft_float */
-
-.Ltga:
-	bl	HIDDEN_JUMPTARGET(__tls_get_addr)
-	ADDI	a0, a0, -TLS_DTV_OFFSET
-
-#ifndef __loongarch_soft_float
-
-	la.global   t0, _rtld_global_ro
-	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
-	andi	t1, t0, HWCAP_LOONGARCH_LASX
-	beqz	t1, .Llsx1
-
-	/* Restore 256-bit vector registers.  */
-	xvld	xr0, sp, 0*SZXREG
-	xvld	xr1, sp, 1*SZXREG
-	xvld	xr2, sp, 2*SZXREG
-	xvld	xr3, sp, 3*SZXREG
-	xvld	xr4, sp, 4*SZXREG
-	xvld	xr5, sp, 5*SZXREG
-	xvld	xr6, sp, 6*SZXREG
-	xvld	xr7, sp, 7*SZXREG
-	xvld	xr8, sp, 8*SZXREG
-	xvld	xr9, sp, 9*SZXREG
-	xvld	xr10, sp, 10*SZXREG
-	xvld	xr11, sp, 11*SZXREG
-	xvld	xr12, sp, 12*SZXREG
-	xvld	xr13, sp, 13*SZXREG
-	xvld	xr14, sp, 14*SZXREG
-	xvld	xr15, sp, 15*SZXREG
-	xvld	xr16, sp, 16*SZXREG
-	xvld	xr17, sp, 17*SZXREG
-	xvld	xr18, sp, 18*SZXREG
-	xvld	xr19, sp, 19*SZXREG
-	xvld	xr20, sp, 20*SZXREG
-	xvld	xr21, sp, 21*SZXREG
-	xvld	xr22, sp, 22*SZXREG
-	xvld	xr23, sp, 23*SZXREG
-	xvld	xr24, sp, 24*SZXREG
-	xvld	xr25, sp, 25*SZXREG
-	xvld	xr26, sp, 26*SZXREG
-	xvld	xr27, sp, 27*SZXREG
-	xvld	xr28, sp, 28*SZXREG
-	xvld	xr29, sp, 29*SZXREG
-	xvld	xr30, sp, 30*SZXREG
-	xvld	xr31, sp, 31*SZXREG
-	ADDI	sp, sp, FRAME_SIZE_LASX
-	b .Lfcsr
-
-.Llsx1:
-	andi	t1, t0, HWCAP_LOONGARCH_LSX
-	beqz	t1, .Lfloat1
-
-	/* Restore 128-bit vector registers.  */
-	vld	vr0, sp, 0*SZVREG
-	vld	vr1, sp, 1*SZVREG
-	vld	vr2, sp, 2*SZVREG
-	vld	vr3, sp, 3*SZVREG
-	vld	vr4, sp, 4*SZVREG
-	vld	vr5, sp, 5*SZVREG
-	vld	vr6, sp, 6*SZVREG
-	vld	vr7, sp, 7*SZVREG
-	vld	vr8, sp, 8*SZVREG
-	vld	vr9, sp, 9*SZVREG
-	vld	vr10, sp, 10*SZVREG
-	vld	vr11, sp, 11*SZVREG
-	vld	vr12, sp, 12*SZVREG
-	vld	vr13, sp, 13*SZVREG
-	vld	vr14, sp, 14*SZVREG
-	vld	vr15, sp, 15*SZVREG
-	vld	vr16, sp, 16*SZVREG
-	vld	vr17, sp, 17*SZVREG
-	vld	vr18, sp, 18*SZVREG
-	vld	vr19, sp, 19*SZVREG
-	vld	vr20, sp, 20*SZVREG
-	vld	vr21, sp, 21*SZVREG
-	vld	vr22, sp, 22*SZVREG
-	vld	vr23, sp, 23*SZVREG
-	vld	vr24, sp, 24*SZVREG
-	vld	vr25, sp, 25*SZVREG
-	vld	vr26, sp, 26*SZVREG
-	vld	vr27, sp, 27*SZVREG
-	vld	vr28, sp, 28*SZVREG
-	vld	vr29, sp, 29*SZVREG
-	vld	vr30, sp, 30*SZVREG
-	vld	vr31, sp, 31*SZVREG
-	ADDI	sp, sp, FRAME_SIZE_LSX
-	b	    .Lfcsr
-
-.Lfloat1:
-	/* Restore float registers.  */
-	FREG_L	fa0, sp, 0*SZFREG
-	FREG_L	fa1, sp, 1*SZFREG
-	FREG_L	fa2, sp, 2*SZFREG
-	FREG_L	fa3, sp, 3*SZFREG
-	FREG_L	fa4, sp, 4*SZFREG
-	FREG_L	fa5, sp, 5*SZFREG
-	FREG_L	fa6, sp, 6*SZFREG
-	FREG_L	fa7, sp, 7*SZFREG
-	FREG_L	ft0, sp, 8*SZFREG
-	FREG_L	ft1, sp, 9*SZFREG
-	FREG_L	ft2, sp, 10*SZFREG
-	FREG_L	ft3, sp, 11*SZFREG
-	FREG_L	ft4, sp, 12*SZFREG
-	FREG_L	ft5, sp, 13*SZFREG
-	FREG_L	ft6, sp, 14*SZFREG
-	FREG_L	ft7, sp, 15*SZFREG
-	FREG_L	ft8, sp, 16*SZFREG
-	FREG_L	ft9, sp, 17*SZFREG
-	FREG_L	ft10, sp, 18*SZFREG
-	FREG_L	ft11, sp, 19*SZFREG
-	FREG_L	ft12, sp, 20*SZFREG
-	FREG_L	ft13, sp, 21*SZFREG
-	FREG_L	ft14, sp, 22*SZFREG
-	FREG_L	ft15, sp, 23*SZFREG
-	ADDI	sp, sp, FRAME_SIZE_FLOAT
-
-.Lfcsr:
-	/* Restore fcsr0 register.  */
-	ld.w	t0, sp, FRAME_SIZE + 24
-	movgr2fcsr  fcsr0, t0
+#define USE_LASX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
+#define Lret Lret_lasx
+#define Lslow Lslow_lasx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef V_REG_S
+#undef V_REG_L
+#undef V_SPACE
+#undef V_REG
+#undef V_REGS
+#undef V_REGSZ
+#undef USE_LASX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#define USE_LSX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
+#define Lret Lret_lsx
+#define Lslow Lslow_lsx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef V_REG_S
+#undef V_REG_L
+#undef V_SPACE
+#undef V_REG
+#undef V_REGS
+#undef V_REGSZ
+#undef USE_LSX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
 
 #endif /* #ifndef __loongarch_soft_float */
 
-	REG_L	ra, sp, 0 * SZREG
-	REG_L	a1, sp, 1 * SZREG
-	REG_L	a2, sp, 2 * SZREG
-	REG_L	a3, sp, 3 * SZREG
-	REG_L	a4, sp, 4 * SZREG
-	REG_L	a5, sp, 5 * SZREG
-	REG_L	a6, sp, 6 * SZREG
-	REG_L	a7, sp, 7 * SZREG
-	REG_L	t3, sp, 8 * SZREG
-	REG_L	t4, sp, 9 * SZREG
-	REG_L	t5, sp, 10 * SZREG
-	REG_L	t6, sp, 11 * SZREG
-	REG_L	t7, sp, 12 * SZREG
-	REG_L	t8, sp, 13 * SZREG
-	ADDI	sp, sp, FRAME_SIZE
-
-	b	.Lret
-	cfi_endproc
-	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
-	.hidden HIDDEN_JUMPTARGET(__tls_get_addr)
+#include "dl-tlsdesc-dynamic.h"
 
 #endif /* #ifdef SHARED */
diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
index ff8c69cb93..45c43a5b52 100644
--- a/sysdeps/loongarch/dl-tlsdesc.h
+++ b/sysdeps/loongarch/dl-tlsdesc.h
@@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
 
 #ifdef SHARED
 extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+#ifndef __loongarch_soft_float
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
+#endif
 extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
 #endif
 
diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps
index bdfd683454..75e21ee2a8 100644
--- a/sysdeps/loongarch/lp64/libm-test-ulps
+++ b/sysdeps/loongarch/lp64/libm-test-ulps
@@ -976,24 +976,24 @@ float: 1
 ldouble: 3
 
 Function: "exp10m1":
-double: 2
-float: 1
-ldouble: 1
+double: 4
+float: 2
+ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-ldouble: 3
+double: 2
+float: 3
+ldouble: 6
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 3
+double: 5
+float: 3
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1013,24 +1013,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-ldouble: 1
+double: 2
+float: 2
+ldouble: 2
 
 Function: "exp2m1_downward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 2
+double: 3
+float: 2
+ldouble: 4
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 5
 
 Function: "exp_downward":
 double: 1
@@ -1206,24 +1206,24 @@ float: 2
 ldouble: 1
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 2
+float: 3
+ldouble: 4
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 4
 
 Function: "log1p":
 double: 1
@@ -1263,8 +1263,8 @@ double: 3
 ldouble: 1
 
 Function: "log2p1":
-double: 1
-float: 1
+double: 2
+float: 2
 ldouble: 3
 
 Function: "log2p1_downward":
@@ -1278,9 +1278,9 @@ float: 2
 ldouble: 2
 
 Function: "log2p1_upward":
-double: 1
+double: 2
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log_downward":
 ldouble: 1
@@ -1432,22 +1432,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
index 213d0b3074..9f80fceca6 100644
--- a/sysdeps/loongarch/tlsdesc.sym
+++ b/sysdeps/loongarch/tlsdesc.sym
@@ -4,12 +4,6 @@
 #include <link.h>
 #include <dl-tlsdesc.h>
 
-#define SHARED 1
-
-#include <ldsodefs.h>
-
-#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
-
 --
 
 -- Abuse tls.h macros to derive offsets relative to the thread register.
@@ -23,6 +17,3 @@ DTV_COUNTER		offsetof(dtv_t, counter)
 TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
 TLS_DTV_OFFSET		TLS_DTV_OFFSET
 SIZE_OF_TCB		sizeof(tcbhead_t)
-GLRO_DL_HWCAP_OFFSET    GLRO_offsetof (dl_hwcap)
-HWCAP_LOONGARCH_LSX	HWCAP_LOONGARCH_LSX
-HWCAP_LOONGARCH_LASX	HWCAP_LOONGARCH_LASX
diff --git a/sysdeps/m68k/coldfire/fpu/libm-test-ulps b/sysdeps/m68k/coldfire/fpu/libm-test-ulps
index 1b25a70e3f..ae05498719 100644
--- a/sysdeps/m68k/coldfire/fpu/libm-test-ulps
+++ b/sysdeps/m68k/coldfire/fpu/libm-test-ulps
@@ -146,7 +146,6 @@ double: 1
 
 Function: "tgamma":
 double: 1
-float: 1
 
 Function: "y0":
 double: 2
diff --git a/sysdeps/m68k/m680x0/fpu/libm-test-ulps b/sysdeps/m68k/m680x0/fpu/libm-test-ulps
index 6eacfb6b6f..73e98eb479 100644
--- a/sysdeps/m68k/m680x0/fpu/libm-test-ulps
+++ b/sysdeps/m68k/m680x0/fpu/libm-test-ulps
@@ -1208,22 +1208,18 @@ float: 1
 
 Function: "tgamma":
 double: 3
-float: 9
 ldouble: 9
 
 Function: "tgamma_downward":
 double: 3
-float: 9
 ldouble: 9
 
 Function: "tgamma_towardzero":
 double: 3
-float: 9
 ldouble: 9
 
 Function: "tgamma_upward":
 double: 2
-float: 9
 ldouble: 9
 
 Function: "y0":
diff --git a/sysdeps/mach/hurd/faccessat.c b/sysdeps/mach/hurd/faccessat.c
index 998e31962f..6d3d123abb 100644
--- a/sysdeps/mach/hurd/faccessat.c
+++ b/sysdeps/mach/hurd/faccessat.c
@@ -185,6 +185,15 @@ __faccessat_common (int fd, const char *file, int type, int at_flags,
 	return errfunc (err);
     }
 
+  /* If all we wanted was to check for a file existing at the path,
+     then we already got our answer, and we don't need to call
+     file_check_access ().  */
+  if (type == F_OK)
+    {
+      __mach_port_deallocate (__mach_task_self (), io);
+      return 0;
+    }
+
   /* Find out what types of access we are allowed to this file.  */
   err = __file_check_access (io, &allowed);
   __mach_port_deallocate (__mach_task_self (), io);
diff --git a/sysdeps/mach/hurd/fcntl.c b/sysdeps/mach/hurd/fcntl.c
index 9451ed5649..2583f54ef2 100644
--- a/sysdeps/mach/hurd/fcntl.c
+++ b/sysdeps/mach/hurd/fcntl.c
@@ -148,6 +148,7 @@ __libc_fcntl (int fd, int cmd, ...)
 	    cmd = F_SETLKW64;
 	    break;
 	  default:
+	    va_end (ap);
 	    return __hurd_fail (EINVAL);
 	  }
 
@@ -204,7 +205,10 @@ __libc_fcntl (int fd, int cmd, ...)
 		 && fl->l_start != fl64.l_start)
 	     || (sizeof fl->l_len != sizeof fl64.l_len
 		 && fl->l_len != fl64.l_len))
-	      return __hurd_fail (EOVERFLOW);
+	      {
+	        va_end (ap);
+	        return __hurd_fail (EOVERFLOW);
+	      }
 	  }
 
 	result = err ? __hurd_dfail (fd, err) : 0;
diff --git a/sysdeps/mach/hurd/i386/libc.abilist b/sysdeps/mach/hurd/i386/libc.abilist
index 2adf98e71d..dc2c6b505b 100644
--- a/sysdeps/mach/hurd/i386/libc.abilist
+++ b/sysdeps/mach/hurd/i386/libc.abilist
@@ -29,7 +29,17 @@ GLIBC_2.11 mkostemps64 F
 GLIBC_2.11 mkstemps F
 GLIBC_2.11 mkstemps64 F
 GLIBC_2.12 __pthread_self F
+GLIBC_2.12 pthread_attr_getdetachstate F
+GLIBC_2.12 pthread_attr_getinheritsched F
+GLIBC_2.12 pthread_attr_getschedparam F
+GLIBC_2.12 pthread_attr_getschedpolicy F
+GLIBC_2.12 pthread_attr_setdetachstate F
+GLIBC_2.12 pthread_attr_setinheritsched F
+GLIBC_2.12 pthread_attr_setschedpolicy F
+GLIBC_2.12 pthread_equal F
+GLIBC_2.12 pthread_getschedparam F
 GLIBC_2.12 pthread_self F
+GLIBC_2.12 pthread_setschedparam F
 GLIBC_2.13 __fentry__ F
 GLIBC_2.14 syncfs F
 GLIBC_2.15 __fdelt_chk F
diff --git a/sysdeps/mach/hurd/x86_64/vm_param.h b/sysdeps/mach/hurd/x86_64/vm_param.h
index 460445f49e..a6e63a3a1a 100644
--- a/sysdeps/mach/hurd/x86_64/vm_param.h
+++ b/sysdeps/mach/hurd/x86_64/vm_param.h
@@ -15,7 +15,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef _X86_86_VM_PARAM_H
+#ifndef _X86_64_VM_PARAM_H
 #define _X86_64_VM_PARAM_H
 
 /* Arbitrary start of the brk. This is after usual binary and library mappings.  */
diff --git a/sysdeps/mach/sleep.c b/sysdeps/mach/sleep.c
index 572be150bc..aa750c13c5 100644
--- a/sysdeps/mach/sleep.c
+++ b/sysdeps/mach/sleep.c
@@ -43,6 +43,9 @@ __sleep (unsigned int seconds)
   after = time_now ();
   __mach_port_destroy (__mach_task_self (), recv);
 
+  if (after - before > seconds)
+    return 0;
+
   return seconds - (after - before);
 }
 weak_alias (__sleep, sleep)
diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps
index d3666eb7d4..98ea940ccc 100644
--- a/sysdeps/microblaze/libm-test-ulps
+++ b/sysdeps/microblaze/libm-test-ulps
@@ -257,7 +257,6 @@ float: 2
 
 Function: "tgamma":
 double: 5
-float: 4
 
 Function: "y0":
 double: 2
diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps
index 1c27b6615b..cc1600c2ab 100644
--- a/sysdeps/mips/mips32/libm-test-ulps
+++ b/sysdeps/mips/mips32/libm-test-ulps
@@ -778,20 +778,20 @@ double: 2
 float: 1
 
 Function: "exp10m1":
-double: 2
-float: 1
+double: 3
+float: 2
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
+double: 3
+float: 3
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
+double: 2
+float: 3
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
+double: 4
+float: 4
 
 Function: "exp2":
 double: 1
@@ -810,20 +810,20 @@ double: 1
 float: 1
 
 Function: "exp2m1":
-double: 1
-float: 1
+double: 2
+float: 2
 
 Function: "exp2m1_downward":
-double: 2
-float: 1
+double: 3
+float: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
+double: 3
+float: 2
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
+double: 3
+float: 3
 
 Function: "exp_downward":
 double: 1
@@ -967,19 +967,19 @@ float: 2
 
 Function: "log10p1":
 double: 1
-float: 1
+float: 2
 
 Function: "log10p1_downward":
 double: 2
-float: 1
+float: 3
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
 
 Function: "log10p1_upward":
 double: 2
-float: 1
+float: 3
 
 Function: "log1p":
 double: 1
@@ -1015,7 +1015,7 @@ float: 3
 
 Function: "log2p1":
 double: 1
-float: 1
+float: 2
 ldouble: 1
 
 Function: "log2p1_downward":
@@ -1029,7 +1029,7 @@ float: 2
 ldouble: 2
 
 Function: "log2p1_upward":
-double: 1
+double: 2
 float: 2
 ldouble: 1
 
@@ -1156,19 +1156,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
index 902551b448..88dbb20401 100644
--- a/sysdeps/mips/mips64/libm-test-ulps
+++ b/sysdeps/mips/mips64/libm-test-ulps
@@ -977,24 +977,24 @@ float: 1
 ldouble: 3
 
 Function: "exp10m1":
-double: 2
-float: 1
-ldouble: 1
+double: 3
+float: 2
+ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-ldouble: 3
+double: 2
+float: 3
+ldouble: 6
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 3
+double: 4
+float: 4
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1017,24 +1017,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-ldouble: 1
-
-Function: "exp2m1_downward":
 double: 2
-float: 1
+float: 2
 ldouble: 2
 
+Function: "exp2m1_downward":
+double: 3
+float: 3
+ldouble: 3
+
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 2
+double: 3
+float: 2
+ldouble: 4
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 5
 
 Function: "exp_downward":
 double: 1
@@ -1212,23 +1212,23 @@ ldouble: 1
 
 Function: "log10p1":
 double: 1
-float: 1
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 2
+float: 3
+ldouble: 4
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 4
 
 Function: "log1p":
 double: 1
@@ -1272,7 +1272,7 @@ ldouble: 1
 
 Function: "log2p1":
 double: 1
-float: 1
+float: 2
 ldouble: 3
 
 Function: "log2p1_downward":
@@ -1286,9 +1286,9 @@ float: 2
 ldouble: 2
 
 Function: "log2p1_upward":
-double: 1
+double: 2
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log_downward":
 float: 2
@@ -1444,22 +1444,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps
index c8d1a722f7..df6e932cb6 100644
--- a/sysdeps/nios2/libm-test-ulps
+++ b/sysdeps/nios2/libm-test-ulps
@@ -266,7 +266,6 @@ float: 2
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c
index ef199ddbc3..52c90e61e3 100644
--- a/sysdeps/nptl/_Fork.c
+++ b/sysdeps/nptl/_Fork.c
@@ -17,11 +17,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <arch-fork.h>
+#include <libc-lock.h>
 #include <pthreadP.h>
 
 pid_t
 _Fork (void)
 {
+  /* Block all signals to avoid revealing the inconsistent TCB state
+     to a signal handler after fork.  The abort lock should AS-safe
+     to avoid deadlock if _Fork is called from a signal handler.  */
+  internal_sigset_t original_sigmask;
+  __abort_lock_rdlock (&original_sigmask);
+
   pid_t pid = arch_fork (&THREAD_SELF->tid);
   if (pid == 0)
     {
@@ -44,6 +51,8 @@ _Fork (void)
       INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
 			     sizeof (struct robust_list_head));
     }
+
+  __abort_lock_unlock (&original_sigmask);
   return pid;
 }
 libc_hidden_def (_Fork)
diff --git a/sysdeps/nptl/cancellation-pc-check.h b/sysdeps/nptl/cancellation-pc-check.h
new file mode 100644
index 0000000000..cb38ad6819
--- /dev/null
+++ b/sysdeps/nptl/cancellation-pc-check.h
@@ -0,0 +1,54 @@
+/* Architecture specific code for pthread cancellation handling.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NPTL_CANCELLATION_PC_CHECK
+#define _NPTL_CANCELLATION_PC_CHECK
+
+#include <sigcontextinfo.h>
+
+/* For syscalls with side-effects (e.g read that might return partial read),
+   the kernel cannot restart the syscall when interrupted by a signal, it must
+   return from the call with whatever partial result.  In this case, the saved
+   program counter is set just after the syscall instruction, so the SIGCANCEL
+   handler should not act on cancellation.
+
+   The __syscall_cancel_arch function, used for all cancellable syscalls,
+   contains two extra markers, __syscall_cancel_arch_start and
+   __syscall_cancel_arch_end.  The former points to just before the initial
+   conditional branch that checks if the thread has received a cancellation
+   request, while former points to the instruction after the one responsible
+   to issue the syscall.
+
+   The function check if the program counter (PC) from ucontext_t CTX is
+   within the start and then end boundary from the __syscall_cancel_arch
+   bridge.  Return TRUE if the PC is within the boundary, meaning the
+   syscall does not have any side effects; or FALSE otherwise.  */
+
+static __always_inline bool
+cancellation_pc_check (void *ctx)
+{
+  /* Both are defined in syscall_cancel.S.  */
+  extern const char __syscall_cancel_arch_start[1];
+  extern const char __syscall_cancel_arch_end[1];
+
+  uintptr_t pc = sigcontext_get_pc (ctx);
+  return pc >= (uintptr_t) __syscall_cancel_arch_start
+	 && pc < (uintptr_t) __syscall_cancel_arch_end;
+}
+
+#endif
diff --git a/sysdeps/nptl/lowlevellock-futex.h b/sysdeps/nptl/lowlevellock-futex.h
index 278213a37b..c205806300 100644
--- a/sysdeps/nptl/lowlevellock-futex.h
+++ b/sysdeps/nptl/lowlevellock-futex.h
@@ -21,7 +21,6 @@
 
 #ifndef __ASSEMBLER__
 # include <sysdep.h>
-# include <sysdep-cancel.h>
 # include <kernel-features.h>
 #endif
 
@@ -120,21 +119,10 @@
 		     nr_wake, nr_move, mutex, val)
 
 /* Like lll_futex_wait, but acting as a cancellable entrypoint.  */
-# define lll_futex_wait_cancel(futexp, val, private) \
-  ({                                                                   \
-    int __oldtype = LIBC_CANCEL_ASYNC ();			       \
-    long int __err = lll_futex_wait (futexp, val, LLL_SHARED);	       \
-    LIBC_CANCEL_RESET (__oldtype);				       \
-    __err;							       \
-  })
-
-/* Like lll_futex_timed_wait, but acting as a cancellable entrypoint.  */
-# define lll_futex_timed_wait_cancel(futexp, val, timeout, private) \
-  ({									   \
-    int __oldtype = LIBC_CANCEL_ASYNC ();			       	   \
-    long int __err = lll_futex_timed_wait (futexp, val, timeout, private); \
-    LIBC_CANCEL_RESET (__oldtype);					   \
-    __err;								   \
+# define lll_futex_wait_cancel(futexp, val, private)			\
+  ({									\
+     int __op = __lll_private_flag (FUTEX_WAIT, private);		\
+     INTERNAL_SYSCALL_CANCEL (futex, futexp, __op, val, NULL);		\
   })
 
 #endif  /* !__ASSEMBLER__  */
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
index 30e8a2d177..c2db165052 100644
--- a/sysdeps/nptl/pthreadP.h
+++ b/sysdeps/nptl/pthreadP.h
@@ -261,10 +261,12 @@ libc_hidden_proto (__pthread_unregister_cancel)
 /* Called when a thread reacts on a cancellation request.  */
 static inline void
 __attribute ((noreturn, always_inline))
-__do_cancel (void)
+__do_cancel (void *result)
 {
   struct pthread *self = THREAD_SELF;
 
+  self->result = result;
+
   /* Make sure we get no more cancellations.  */
   atomic_fetch_or_relaxed (&self->cancelhandling, EXITING_BITMASK);
 
@@ -272,6 +274,13 @@ __do_cancel (void)
 		    THREAD_GETMEM (self, cleanup_jmp_buf));
 }
 
+extern long int __syscall_cancel_arch (volatile int *, __syscall_arg_t nr,
+     __syscall_arg_t arg1, __syscall_arg_t arg2, __syscall_arg_t arg3,
+     __syscall_arg_t arg4, __syscall_arg_t arg5, __syscall_arg_t arg6
+     __SYSCALL_CANCEL7_ARCH_ARG_DEF) attribute_hidden;
+
+extern _Noreturn void __syscall_do_cancel (void) attribute_hidden;
+
 
 /* Internal prototypes.  */
 
@@ -508,6 +517,7 @@ libc_hidden_proto (__pthread_kill)
 extern int __pthread_cancel (pthread_t th);
 extern int __pthread_kill_internal (pthread_t threadid, int signo)
   attribute_hidden;
+extern int __pthread_raise_internal (int signo) attribute_hidden;
 extern void __pthread_exit (void *value) __attribute__ ((__noreturn__));
 libc_hidden_proto (__pthread_exit)
 extern int __pthread_join (pthread_t threadid, void **thread_return);
diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps
index 59b9f072f5..4ccb136298 100644
--- a/sysdeps/or1k/fpu/libm-test-ulps
+++ b/sysdeps/or1k/fpu/libm-test-ulps
@@ -1066,19 +1066,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_downward":
 double: 9
-float: 9
 
 Function: "tgamma_towardzero":
 double: 9
-float: 8
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps
index 726855faaa..7087cf9add 100644
--- a/sysdeps/or1k/nofpu/libm-test-ulps
+++ b/sysdeps/or1k/nofpu/libm-test-ulps
@@ -1064,19 +1064,15 @@ float: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_downward":
 double: 9
-float: 9
 
 Function: "tgamma_towardzero":
 double: 9
-float: 8
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/posix/profil.c b/sysdeps/posix/profil.c
index 3b3c87e25a..6f4aa00eeb 100644
--- a/sysdeps/posix/profil.c
+++ b/sysdeps/posix/profil.c
@@ -82,7 +82,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale)
       if (__setitimer (ITIMER_PROF, &otimer, NULL) < 0)
 	return -1;
       samples = NULL;
-      return __sigaction (SIGPROF, &oact, NULL);
+      return __libc_sigaction (SIGPROF, &oact, NULL);
     }
 
  if (samples)
@@ -90,7 +90,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale)
       /* Was already turned on.  Restore old timer and signal handler
 	 first.  */
       if (__setitimer (ITIMER_PROF, &otimer, NULL) < 0
-	  || __sigaction (SIGPROF, &oact, NULL) < 0)
+	  || __libc_sigaction (SIGPROF, &oact, NULL) < 0)
 	return -1;
     }
 #else
@@ -114,7 +114,7 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale)
 #endif
   act.sa_flags |= SA_RESTART;
   __sigfillset (&act.sa_mask);
-  if (__sigaction (SIGPROF, &act, oact_ptr) < 0)
+  if (__libc_sigaction (SIGPROF, &act, oact_ptr) < 0)
     return -1;
 
   timer.it_value.tv_sec = 0;
diff --git a/sysdeps/posix/tempname.c b/sysdeps/posix/tempname.c
index c00fe0c181..fc30958a0c 100644
--- a/sysdeps/posix/tempname.c
+++ b/sysdeps/posix/tempname.c
@@ -117,6 +117,8 @@ random_bits (random_value *r, random_value s)
      succeed.  */
 #if !_LIBC
   *r = mix_random_values (v, clock ());
+#else
+  *r = v;
 #endif
   return false;
 }
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 2e038492cd..dcefbcefcf 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -1185,28 +1185,28 @@ float128: 3
 ldouble: 4
 
 Function: "exp10m1":
-double: 2
-float: 1
-float128: 1
-ldouble: 2
+double: 4
+float: 2
+float128: 3
+ldouble: 4
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-float128: 3
+double: 3
+float: 3
+float128: 6
 ldouble: 9
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-float128: 3
+double: 2
+float: 3
+float128: 6
 ldouble: 9
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-float128: 3
-ldouble: 4
+double: 5
+float: 3
+float128: 6
+ldouble: 8
 
 Function: "exp2":
 double: 1
@@ -1230,28 +1230,28 @@ float128: 2
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-float128: 1
-ldouble: 2
-
-Function: "exp2m1_downward":
-double: 1
-float: 1
+double: 2
+float: 2
 float128: 2
 ldouble: 3
 
+Function: "exp2m1_downward":
+double: 3
+float: 3
+float128: 3
+ldouble: 7
+
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 2
+float128: 4
+ldouble: 8
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 3
+float128: 5
+ldouble: 9
 
 Function: "exp_downward":
 double: 1
@@ -1507,28 +1507,28 @@ float128: 1
 ldouble: 2
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 float128: 3
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-float128: 2
-ldouble: 4
+float: 3
+float128: 4
+ldouble: 6
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-float128: 2
-ldouble: 4
+float128: 3
+ldouble: 6
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-float128: 3
-ldouble: 4
+float: 3
+float128: 4
+ldouble: 6
 
 Function: "log1p":
 double: 1
@@ -1828,25 +1828,21 @@ ldouble: 6
 
 Function: "tgamma":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 float128: 5
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps
index dc9b499cc4..14be978dc3 100644
--- a/sysdeps/powerpc/nofpu/libm-test-ulps
+++ b/sysdeps/powerpc/nofpu/libm-test-ulps
@@ -986,24 +986,24 @@ float: 1
 ldouble: 4
 
 Function: "exp10m1":
-double: 2
-float: 1
+double: 3
+float: 2
 ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
+double: 3
+float: 3
 ldouble: 9
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
+double: 2
+float: 3
 ldouble: 9
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 4
+double: 4
+float: 4
+ldouble: 10
 
 Function: "exp2":
 double: 1
@@ -1026,24 +1026,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
+double: 2
+float: 2
 ldouble: 2
 
 Function: "exp2m1_downward":
-double: 2
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 7
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 3
+double: 3
+float: 2
+ldouble: 8
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 9
 
 Function: "exp_downward":
 double: 1
@@ -1269,23 +1269,23 @@ ldouble: 2
 
 Function: "log10p1":
 double: 1
-float: 1
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 4
+float: 3
+ldouble: 6
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 4
+ldouble: 6
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 6
 
 Function: "log1p":
 double: 1
@@ -1560,22 +1560,18 @@ ldouble: 6
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
index 62de4ca2e5..852a755c7c 100644
--- a/sysdeps/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/powerpc/powerpc32/sysdep.h
@@ -104,6 +104,9 @@ GOT_LABEL:			;					      \
 # define JUMPTARGET(name) name
 #endif
 
+#define TAIL_CALL_NO_RETURN(__func) \
+    b __func@local
+
 #if defined SHARED && defined PIC && !defined NO_HIDDEN
 # undef HIDDEN_JUMPTARGET
 # define HIDDEN_JUMPTARGET(name) __GI_##name##@local
diff --git a/sysdeps/powerpc/powerpc64/le/Makefile b/sysdeps/powerpc/powerpc64/le/Makefile
index 9d568d4f44..b77775cf95 100644
--- a/sysdeps/powerpc/powerpc64/le/Makefile
+++ b/sysdeps/powerpc/powerpc64/le/Makefile
@@ -129,6 +129,10 @@ CFLAGS-tst-strtod-round.c += $(type-float128-CFLAGS)
 CFLAGS-tst-wcstod-round.c += $(type-float128-CFLAGS)
 CFLAGS-tst-strtod-nan-locale.c += $(type-float128-CFLAGS)
 CFLAGS-tst-wcstod-nan-locale.c += $(type-float128-CFLAGS)
+CFLAGS-tst-strtod1i.c += $(type-float128-CFLAGS)
+CFLAGS-tst-strtod3.c += $(type-float128-CFLAGS)
+CFLAGS-tst-strtod4.c += $(type-float128-CFLAGS)
+CFLAGS-tst-strtod5i.c += $(type-float128-CFLAGS)
 CFLAGS-tst-strtod6.c += $(type-float128-CFLAGS)
 CFLAGS-tst-strfrom.c += $(type-float128-CFLAGS)
 CFLAGS-tst-strfrom-locale.c += $(type-float128-CFLAGS)
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S
index 603bde1e39..2f50625a19 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strcpy.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strcpy.S
@@ -42,22 +42,48 @@
 
    if USE_AS_STPCPY is defined.
 
-   The implementation can load bytes past a null terminator, but only
-   up to the next 16B boundary, so it never crosses a page.  */
+   This implementation never reads across a page boundary, but may
+   read beyond the NUL terminator.  */
 
-/* Load quadword at addr+offset to vreg, check for null bytes,
+/* Load 4 quadwords, merge into one VR for speed and check for NUL
+   and branch to label if NUL is found.  */
+#define CHECK_64B(offset,addr,label)		\
+	lxv	32+v4,(offset+0)(addr);		\
+	lxv	32+v5,(offset+16)(addr);	\
+	lxv	32+v6,(offset+32)(addr);	\
+	lxv	32+v7,(offset+48)(addr);	\
+	vminub	v14,v4,v5;			\
+	vminub	v15,v6,v7;			\
+	vminub	v16,v14,v15;			\
+	vcmpequb.	v0,v16,v18;		\
+	beq	cr6,$+12;			\
+	li	r7,offset;			\
+	b	L(label);			\
+	stxv	32+v4,(offset+0)(r11);		\
+	stxv	32+v5,(offset+16)(r11);		\
+	stxv	32+v6,(offset+32)(r11);		\
+	stxv	32+v7,(offset+48)(r11)
+
+/* Load quadword at addr+offset to vreg, check for NUL bytes,
    and branch to label if any are found.  */
-#define CHECK16(vreg,offset,addr,label) \
-	lxv	vreg+32,offset(addr);	\
-	vcmpequb. v6,vreg,v18;	\
+#define CHECK_16B(vreg,offset,addr,label)	\
+	lxv	vreg+32,offset(addr);		\
+	vcmpequb.	v15,vreg,v18;		\
 	bne	cr6,L(label);
 
-.machine power9
+/* Store vreg2 with length if NUL is found.  */
+#define STORE_WITH_LEN(vreg1,vreg2,reg)	\
+	vctzlsbb	r8,vreg1;		\
+	addi	r9,r8,1;			\
+	sldi	r9,r9,56;			\
+	stxvl	32+vreg2,reg,r9;
+
+.machine	power9
 ENTRY_TOCLESS (FUNC_NAME, 4)
 	CALL_MCOUNT 2
 
-	vspltisb v18,0		/* Zeroes in v18  */
-	vspltisb v19,-1 	/* 0xFF bytes in v19  */
+	vspltisb	v18,0		/* Zeroes in v18.  */
+	vspltisb	v19,-1		/* 0xFF bytes in v19.  */
 
 	/* Next 16B-aligned address. Prepare address for L(loop).  */
 	addi	r5,r4,16
@@ -70,14 +96,11 @@ ENTRY_TOCLESS (FUNC_NAME, 4)
 	lvsr	v1,0,r4
 	vperm	v0,v19,v0,v1
 
-	vcmpequb. v6,v0,v18	/* 0xff if byte is NULL, 0x00 otherwise  */
+	vcmpequb.	v6,v0,v18	/* 0xff if byte is NUL, 0x00 otherwise.  */
 	beq	cr6,L(no_null)
 
-	/* There's a null byte.  */
-	vctzlsbb r8,v6		/* Number of trailing zeroes  */
-	addi	r9,r8,1 	/* Add null byte.  */
-	sldi	r10,r9,56	/* stxvl wants size in top 8 bits.  */
-	stxvl	32+v0,r3,r10	/* Partial store  */
+	/* There's a NUL byte.  */
+	STORE_WITH_LEN(v6,v0,r3)
 
 #ifdef USE_AS_STPCPY
 	/* stpcpy returns the dest address plus the size not counting the
@@ -87,17 +110,22 @@ ENTRY_TOCLESS (FUNC_NAME, 4)
 	blr
 
 L(no_null):
-	sldi	r10,r8,56	/* stxvl wants size in top 8 bits  */
-	stxvl	32+v0,r3,r10	/* Partial store  */
+	sldi	r10,r8,56	/* stxvl wants size in top 8 bits.  */
+	stxvl	32+v0,r3,r10	/* Partial store.  */
 
+/* The main loop is optimized for longer strings(> 512 bytes),
+   so checking the first bytes in 16B chunks benefits shorter
+   strings a lot.  */
 	.p2align 4
-L(loop):
-	CHECK16(v0,0,r5,tail1)
-	CHECK16(v1,16,r5,tail2)
-	CHECK16(v2,32,r5,tail3)
-	CHECK16(v3,48,r5,tail4)
-	CHECK16(v4,64,r5,tail5)
-	CHECK16(v5,80,r5,tail6)
+L(aligned):
+	CHECK_16B(v0,0,r5,tail1)
+	CHECK_16B(v1,16,r5,tail2)
+	CHECK_16B(v2,32,r5,tail3)
+	CHECK_16B(v3,48,r5,tail4)
+	CHECK_16B(v4,64,r5,tail5)
+	CHECK_16B(v5,80,r5,tail6)
+	CHECK_16B(v6,96,r5,tail7)
+	CHECK_16B(v7,112,r5,tail8)
 
 	stxv	32+v0,0(r11)
 	stxv	32+v1,16(r11)
@@ -105,21 +133,146 @@ L(loop):
 	stxv	32+v3,48(r11)
 	stxv	32+v4,64(r11)
 	stxv	32+v5,80(r11)
+	stxv	32+v6,96(r11)
+	stxv	32+v7,112(r11)
 
-	addi	r5,r5,96
-	addi	r11,r11,96
+	addi	r11,r11,128
+
+	CHECK_16B(v0,128,r5,tail1)
+	CHECK_16B(v1,128+16,r5,tail2)
+	CHECK_16B(v2,128+32,r5,tail3)
+	CHECK_16B(v3,128+48,r5,tail4)
+	CHECK_16B(v4,128+64,r5,tail5)
+	CHECK_16B(v5,128+80,r5,tail6)
+	CHECK_16B(v6,128+96,r5,tail7)
+	CHECK_16B(v7,128+112,r5,tail8)
+
+	stxv	32+v0,0(r11)
+	stxv	32+v1,16(r11)
+	stxv	32+v2,32(r11)
+	stxv	32+v3,48(r11)
+	stxv	32+v4,64(r11)
+	stxv	32+v5,80(r11)
+	stxv	32+v6,96(r11)
+	stxv	32+v7,112(r11)
+
+	addi	r11,r11,128
+
+	CHECK_16B(v0,256,r5,tail1)
+	CHECK_16B(v1,256+16,r5,tail2)
+	CHECK_16B(v2,256+32,r5,tail3)
+	CHECK_16B(v3,256+48,r5,tail4)
+	CHECK_16B(v4,256+64,r5,tail5)
+	CHECK_16B(v5,256+80,r5,tail6)
+	CHECK_16B(v6,256+96,r5,tail7)
+	CHECK_16B(v7,256+112,r5,tail8)
+
+	stxv	32+v0,0(r11)
+	stxv	32+v1,16(r11)
+	stxv	32+v2,32(r11)
+	stxv	32+v3,48(r11)
+	stxv	32+v4,64(r11)
+	stxv	32+v5,80(r11)
+	stxv	32+v6,96(r11)
+	stxv	32+v7,112(r11)
+
+	addi	r11,r11,128
+
+	CHECK_16B(v0,384,r5,tail1)
+	CHECK_16B(v1,384+16,r5,tail2)
+	CHECK_16B(v2,384+32,r5,tail3)
+	CHECK_16B(v3,384+48,r5,tail4)
+	CHECK_16B(v4,384+64,r5,tail5)
+	CHECK_16B(v5,384+80,r5,tail6)
+	CHECK_16B(v6,384+96,r5,tail7)
+	CHECK_16B(v7,384+112,r5,tail8)
+
+	stxv	32+v0,0(r11)
+	stxv	32+v1,16(r11)
+	stxv	32+v2,32(r11)
+	stxv	32+v3,48(r11)
+	stxv	32+v4,64(r11)
+	stxv	32+v5,80(r11)
+	stxv	32+v6,96(r11)
+	stxv	32+v7,112(r11)
+
+	/* Align src pointer down to a 64B boundary.  */
+	addi	r5,r4,512
+	clrrdi	r5,r5,6
+	subf	r7,r4,r5
+	add	r11,r3,r7
+
+/* Switch to a more aggressive approach checking 64B each time.  */
+	.p2align 5
+L(strcpy_loop):
+	CHECK_64B(0,r5,tail_64b)
+	CHECK_64B(64,r5,tail_64b)
+	CHECK_64B(128,r5,tail_64b)
+	CHECK_64B(192,r5,tail_64b)
+
+	CHECK_64B(256,r5,tail_64b)
+	CHECK_64B(256+64,r5,tail_64b)
+	CHECK_64B(256+128,r5,tail_64b)
+	CHECK_64B(256+192,r5,tail_64b)
+	addi	r5,r5,512
+	addi	r11,r11,512
+
+	b	L(strcpy_loop)
+
+	.p2align 5
+L(tail_64b):
+	/* OK, we found a NUL byte.  Let's look for it in the current 64-byte
+	   block and mark it in its corresponding VR.  */
+	add	r11,r11,r7
+	vcmpequb.	v8,v4,v18
+	beq	cr6,L(no_null_16B)
+	/* There's a NUL byte.  */
+	STORE_WITH_LEN(v8,v4,r11)
+#ifdef USE_AS_STPCPY
+	add	r3,r11,r8
+#endif
+	blr
+
+L(no_null_16B):
+	stxv	32+v4,0(r11)
+	vcmpequb.	v8,v5,v18
+	beq	cr6,L(no_null_32B)
+	/* There's a NUL byte.  */
+	addi	r11,r11,16
+	STORE_WITH_LEN(v8,v5,r11)
+#ifdef USE_AS_STPCPY
+	add	r3,r11,r8
+#endif
+	blr
 
-	b	L(loop)
+L(no_null_32B):
+	stxv	32+v5,16(r11)
+	vcmpequb.	v8,v6,v18
+	beq	cr6,L(no_null_48B)
+	/* There's a NUL byte.  */
+	addi	r11,r11,32
+	STORE_WITH_LEN(v8,v6,r11)
+#ifdef USE_AS_STPCPY
+	add	r3,r11,r8
+#endif
+	blr
+
+L(no_null_48B):
+	stxv	32+v6,32(r11)
+	vcmpequb.	v8,v7,v18;
+	/* There's a NUL byte.  */
+	addi	r11,r11,48
+	STORE_WITH_LEN(v8,v7,r11)
+#ifdef USE_AS_STPCPY
+	add	r3,r11,r8
+#endif
+	blr
 
 	.p2align 4
 L(tail1):
-	vctzlsbb r8,v6		/* Number of trailing zeroes  */
-	addi	r9,r8,1		/* Add null terminator  */
-	sldi	r9,r9,56	/* stxvl wants size in top 8 bits  */
-	stxvl	32+v0,r11,r9	/* Partial store  */
+	/* There's a NUL byte.  */
+	STORE_WITH_LEN(v15,v0,r11)
 #ifdef USE_AS_STPCPY
-	/* stpcpy returns the dest address plus the size not counting the
-	   final '\0'.  */
 	add	r3,r11,r8
 #endif
 	blr
@@ -127,11 +280,9 @@ L(tail1):
 	.p2align 4
 L(tail2):
 	stxv	32+v0,0(r11)
-	vctzlsbb r8,v6
-	addi	r9,r8,1
-	sldi	r9,r9,56
+	/* There's a NUL byte.  */
 	addi	r11,r11,16
-	stxvl	32+v1,r11,r9
+	STORE_WITH_LEN(v15,v1,r11)
 #ifdef USE_AS_STPCPY
 	add	r3,r11,r8
 #endif
@@ -141,11 +292,8 @@ L(tail2):
 L(tail3):
 	stxv	32+v0,0(r11)
 	stxv	32+v1,16(r11)
-	vctzlsbb r8,v6
-	addi	r9,r8,1
-	sldi	r9,r9,56
 	addi	r11,r11,32
-	stxvl	32+v2,r11,r9
+	STORE_WITH_LEN(v15,v2,r11)
 #ifdef USE_AS_STPCPY
 	add	r3,r11,r8
 #endif
@@ -156,11 +304,8 @@ L(tail4):
 	stxv	32+v0,0(r11)
 	stxv	32+v1,16(r11)
 	stxv	32+v2,32(r11)
-	vctzlsbb r8,v6
-	addi	r9,r8,1
-	sldi	r9,r9,56
 	addi	r11,r11,48
-	stxvl	32+v3,r11,r9
+	STORE_WITH_LEN(v15,v3,r11)
 #ifdef USE_AS_STPCPY
 	add	r3,r11,r8
 #endif
@@ -172,11 +317,8 @@ L(tail5):
 	stxv	32+v1,16(r11)
 	stxv	32+v2,32(r11)
 	stxv	32+v3,48(r11)
-	vctzlsbb r8,v6
-	addi	r9,r8,1
-	sldi	r9,r9,56
 	addi	r11,r11,64
-	stxvl	32+v4,r11,r9
+	STORE_WITH_LEN(v15,v4,r11)
 #ifdef USE_AS_STPCPY
 	add	r3,r11,r8
 #endif
@@ -189,11 +331,39 @@ L(tail6):
 	stxv	32+v2,32(r11)
 	stxv	32+v3,48(r11)
 	stxv	32+v4,64(r11)
-	vctzlsbb r8,v6
-	addi	r9,r8,1
-	sldi	r9,r9,56
 	addi	r11,r11,80
-	stxvl	32+v5,r11,r9
+	STORE_WITH_LEN(v15,v5,r11)
+#ifdef USE_AS_STPCPY
+	add	r3,r11,r8
+#endif
+	blr
+
+	.p2align 4
+L(tail7):
+	stxv	32+v0,0(r11)
+	stxv	32+v1,16(r11)
+	stxv	32+v2,32(r11)
+	stxv	32+v3,48(r11)
+	stxv	32+v4,64(r11)
+	stxv	32+v5,80(r11)
+	addi	r11,r11,96
+	STORE_WITH_LEN(v15,v6,r11)
+#ifdef USE_AS_STPCPY
+	add	r3,r11,r8
+#endif
+	blr
+
+	.p2align 4
+L(tail8):
+	stxv	32+v0,0(r11)
+	stxv	32+v1,16(r11)
+	stxv	32+v2,32(r11)
+	stxv	32+v3,48(r11)
+	stxv	32+v4,64(r11)
+	stxv	32+v5,80(r11)
+	stxv	32+v6,96(r11)
+	addi	r11,r11,112
+	STORE_WITH_LEN(v15,v7,r11)
 #ifdef USE_AS_STPCPY
 	add	r3,r11,r8
 #endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c
index d453b6535d..6eda080fe2 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c
@@ -8,12 +8,12 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 #include <string.h>
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c
index 0a77812b38..b9cbce2d92 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c
@@ -8,12 +8,12 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 #include <string.h>
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c
index fd3b08d7e7..67f7d4052c 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c
@@ -8,12 +8,12 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 #include <string.h>
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c
index a848fcd8b4..f3bccf297c 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c
@@ -8,12 +8,12 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 #include <string.h>
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c
index a31b918879..3b0ea87ec2 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c
@@ -13,7 +13,7 @@
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 #include <string.h>
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c
index 7f4c9f1d4b..b467fdecc1 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c
@@ -8,12 +8,12 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 #include <string.h>
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c
index af855e6a5c..6e4e2ea14b 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c
@@ -9,12 +9,12 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/ >.  */
+   <https://www.gnu.org/licenses/>.  */
 
 /* Define multiple versions only for definition in libc. */
 #if IS_IN (libc)
diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
index c363939e1a..c439b06121 100644
--- a/sysdeps/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/powerpc/powerpc64/sysdep.h
@@ -353,6 +353,25 @@ LT_LABELSUFFIX(name,_name_end): ; \
   DO_CALL (SYS_ify (syscall_name))
 
 #ifdef SHARED
+# define TAIL_CALL_NO_RETURN(__func) \
+    b JUMPTARGET (NOTOC (__func))
+#else
+# define TAIL_CALL_NO_RETURN(__func) \
+    .ifdef .Local ## __func; \
+    b .Local ## __func; \
+    .else; \
+.Local ## __func: \
+    mflr 0; \
+    std 0,FRAME_LR_SAVE(1); \
+    stdu 1,-FRAME_MIN_SIZE(1); \
+    cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \
+    cfi_offset(lr,FRAME_LR_SAVE); \
+    bl JUMPTARGET(__func); \
+    nop; \
+    .endif
+#endif
+
+#ifdef SHARED
 #define TAIL_CALL_SYSCALL_ERROR \
     b JUMPTARGET (NOTOC (__syscall_error))
 #else
diff --git a/sysdeps/pthread/tst-cancel2.c b/sysdeps/pthread/tst-cancel2.c
index ac38b50115..b4f7098235 100644
--- a/sysdeps/pthread/tst-cancel2.c
+++ b/sysdeps/pthread/tst-cancel2.c
@@ -32,6 +32,10 @@ tf (void *arg)
   char buf[100000];
 
   while (write (fd[1], buf, sizeof (buf)) > 0);
+  /* The write can return -1/EPIPE if the pipe was closed before the
+     thread calls write, which signals a side-effect that must be
+     signaled to the thread.  */
+  pthread_testcancel ();
 
   return (void *) 42l;
 }
diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps
index 9ad64d1d85..4adad138b0 100644
--- a/sysdeps/riscv/nofpu/libm-test-ulps
+++ b/sysdeps/riscv/nofpu/libm-test-ulps
@@ -1361,22 +1361,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 5
-float: 5
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 5
-float: 4
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 4
-float: 4
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps
index 1e6c092361..07a8794e48 100644
--- a/sysdeps/riscv/rvd/libm-test-ulps
+++ b/sysdeps/riscv/rvd/libm-test-ulps
@@ -976,24 +976,24 @@ float: 1
 ldouble: 3
 
 Function: "exp10m1":
-double: 2
-float: 1
-ldouble: 1
+double: 4
+float: 2
+ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-ldouble: 3
+double: 2
+float: 3
+ldouble: 6
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 3
+double: 5
+float: 3
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1013,24 +1013,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-ldouble: 1
+double: 2
+float: 2
+ldouble: 2
 
 Function: "exp2m1_downward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 2
+double: 3
+float: 2
+ldouble: 4
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 5
 
 Function: "exp_downward":
 double: 1
@@ -1206,24 +1206,24 @@ float: 2
 ldouble: 1
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 2
+float: 3
+ldouble: 4
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 4
 
 Function: "log1p":
 double: 1
@@ -1431,22 +1431,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 8
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/riscv/start.S b/sysdeps/riscv/start.S
index ede186ef23..fd6f1ca038 100644
--- a/sysdeps/riscv/start.S
+++ b/sysdeps/riscv/start.S
@@ -87,6 +87,7 @@ load_gp:
 	ret
 
 	.section .preinit_array,"aw"
+	.align PTRLOG
 	.dc.a load_gp
 
 /* Define a symbol for the first piece of initialized data.  */
diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps
index 9ac3db4fa5..6e4084ae37 100644
--- a/sysdeps/s390/fpu/libm-test-ulps
+++ b/sysdeps/s390/fpu/libm-test-ulps
@@ -977,24 +977,24 @@ float: 1
 ldouble: 3
 
 Function: "exp10m1":
-double: 2
-float: 1
-ldouble: 1
+double: 4
+float: 2
+ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-ldouble: 3
+double: 2
+float: 3
+ldouble: 6
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 3
+double: 5
+float: 3
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1014,24 +1014,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-ldouble: 1
+double: 2
+float: 2
+ldouble: 2
 
 Function: "exp2m1_downward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 3
 
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 2
+double: 3
+float: 2
+ldouble: 4
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 5
 
 Function: "exp_downward":
 double: 1
@@ -1207,24 +1207,24 @@ float: 2
 ldouble: 1
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 2
+float: 3
+ldouble: 4
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 4
 
 Function: "log1p":
 double: 1
@@ -1429,22 +1429,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c
index 51dd979607..9ac8a82df8 100644
--- a/sysdeps/s390/utf16-utf32-z9.c
+++ b/sysdeps/s390/utf16-utf32-z9.c
@@ -397,8 +397,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput asm ("8") = inptr;		\
     register size_t inlen asm ("9") = inend - inptr;			\
-    register unsigned char* pOutput asm ("10") = outptr;		\
-    register size_t outlen asm ("11") = outend - outptr;		\
+    register unsigned char* pOutput asm ("6") = outptr;			\
+    register size_t outlen asm ("7") = outend - outptr;			\
     unsigned long tmp, tmp2, tmp3;					\
     asm volatile (".machine push\n\t"					\
 		  ".machine \"z13\"\n\t"				\
@@ -707,8 +707,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput asm ("8") = inptr;		\
     register size_t inlen asm ("9") = inend - inptr;			\
-    register unsigned char* pOutput asm ("10") = outptr;		\
-    register size_t outlen asm ("11") = outend - outptr;		\
+    register unsigned char* pOutput asm ("6") = outptr;			\
+    register size_t outlen asm ("7") = outend - outptr;			\
     unsigned long tmp, tmp2, tmp3;					\
     asm volatile (".machine push\n\t"					\
 		  ".machine \"z13\"\n\t"				\
diff --git a/sysdeps/s390/utf8-utf16-z9.c b/sysdeps/s390/utf8-utf16-z9.c
index 421747f058..d19fc3c97b 100644
--- a/sysdeps/s390/utf8-utf16-z9.c
+++ b/sysdeps/s390/utf8-utf16-z9.c
@@ -170,8 +170,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput __asm__ ("8") = inptr;		\
     register size_t inlen __asm__ ("9") = inend - inptr;		\
-    register unsigned char* pOutput __asm__ ("10") = outptr;		\
-    register size_t outlen __asm__("11") = outend - outptr;		\
+    register unsigned char* pOutput __asm__ ("6") = outptr;		\
+    register size_t outlen __asm__("7") = outend - outptr;		\
     unsigned long cc = 0;						\
 									\
     __asm__ __volatile__ (".machine push       \n\t"			\
@@ -242,8 +242,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput asm ("8") = inptr;		\
     register size_t inlen asm ("9") = inend - inptr;			\
-    register unsigned char* pOutput asm ("10") = outptr;		\
-    register size_t outlen asm("11") = outend - outptr;			\
+    register unsigned char* pOutput asm ("6") = outptr;			\
+    register size_t outlen asm("7") = outend - outptr;			\
     unsigned long tmp, tmp2, tmp3;					\
     asm volatile (".machine push\n\t"					\
 		  ".machine \"z13\"\n\t"				\
@@ -818,8 +818,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput asm ("8") = inptr;		\
     register size_t inlen asm ("9") = inend - inptr;			\
-    register unsigned char* pOutput asm ("10") = outptr;		\
-    register size_t outlen asm ("11") = outend - outptr;		\
+    register unsigned char* pOutput asm ("6") = outptr;			\
+    register size_t outlen asm ("7") = outend - outptr;			\
     unsigned long tmp, tmp2, tmp3;					\
     asm volatile (".machine push\n\t"					\
 		  ".machine \"z13\"\n\t"				\
diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c
index 9c4189a5f0..e4323d634c 100644
--- a/sysdeps/s390/utf8-utf32-z9.c
+++ b/sysdeps/s390/utf8-utf32-z9.c
@@ -170,8 +170,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput __asm__ ("8") = inptr;		\
     register size_t inlen __asm__ ("9") = inend - inptr;		\
-    register unsigned char* pOutput __asm__ ("10") = outptr;		\
-    register size_t outlen __asm__("11") = outend - outptr;		\
+    register unsigned char* pOutput __asm__ ("6") = outptr;		\
+    register size_t outlen __asm__("7") = outend - outptr;		\
     unsigned long cc = 0;						\
 									\
     __asm__ __volatile__ (".machine push       \n\t"			\
@@ -475,8 +475,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput asm ("8") = inptr;		\
     register size_t inlen asm ("9") = inend - inptr;			\
-    register unsigned char* pOutput asm ("10") = outptr;		\
-    register size_t outlen asm("11") = outend - outptr;			\
+    register unsigned char* pOutput asm ("6") = outptr;		\
+    register size_t outlen asm("7") = outend - outptr;			\
     unsigned long tmp, tmp2, tmp3;					\
     asm volatile (".machine push\n\t"					\
 		  ".machine \"z13\"\n\t"				\
@@ -864,8 +864,8 @@ gconv_end (struct __gconv_step *data)
   {									\
     register const unsigned char* pInput asm ("8") = inptr;		\
     register size_t inlen asm ("9") = inend - inptr;			\
-    register unsigned char* pOutput asm ("10") = outptr;		\
-    register size_t outlen asm ("11") = outend - outptr;		\
+    register unsigned char* pOutput asm ("6") = outptr;			\
+    register size_t outlen asm ("7") = outend - outptr;			\
     unsigned long tmp, tmp2;						\
     asm volatile (".machine push\n\t"					\
 		  ".machine \"z13\"\n\t"				\
diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps
index 3c84259941..36f21ed395 100644
--- a/sysdeps/sh/libm-test-ulps
+++ b/sysdeps/sh/libm-test-ulps
@@ -532,11 +532,9 @@ float: 2
 
 Function: "tgamma":
 double: 9
-float: 8
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 
 Function: "y0":
 double: 3
diff --git a/sysdeps/sh/sysdep.h b/sysdeps/sh/sysdep.h
index 0c9e5626e9..377d29b950 100644
--- a/sysdeps/sh/sysdep.h
+++ b/sysdeps/sh/sysdep.h
@@ -24,6 +24,7 @@
 
 #define ALIGNARG(log2) log2
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
+#define L(label) .L##label
 
 #ifdef SHARED
 #define PLTJMP(_x)	_x##@PLT
diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps
index 0142357b3f..ccb6c794e7 100644
--- a/sysdeps/sparc/fpu/libm-test-ulps
+++ b/sysdeps/sparc/fpu/libm-test-ulps
@@ -977,24 +977,24 @@ float: 1
 ldouble: 3
 
 Function: "exp10m1":
-double: 2
-float: 1
-ldouble: 1
+double: 3
+float: 2
+ldouble: 3
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-ldouble: 3
+double: 3
+float: 3
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-ldouble: 3
+double: 2
+float: 3
+ldouble: 6
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-ldouble: 3
+double: 4
+float: 4
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1017,24 +1017,24 @@ float: 1
 ldouble: 2
 
 Function: "exp2m1":
-double: 1
-float: 1
-ldouble: 1
-
-Function: "exp2m1_downward":
 double: 2
-float: 1
+float: 2
 ldouble: 2
 
+Function: "exp2m1_downward":
+double: 3
+float: 3
+ldouble: 3
+
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-ldouble: 2
+double: 3
+float: 2
+ldouble: 4
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-ldouble: 2
+double: 3
+float: 3
+ldouble: 5
 
 Function: "exp_downward":
 double: 1
@@ -1212,23 +1212,23 @@ ldouble: 1
 
 Function: "log10p1":
 double: 1
-float: 1
+float: 2
 ldouble: 3
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-ldouble: 2
+float: 3
+ldouble: 4
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-ldouble: 2
+ldouble: 3
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-ldouble: 3
+float: 3
+ldouble: 4
 
 Function: "log1p":
 double: 1
@@ -1444,22 +1444,18 @@ ldouble: 3
 
 Function: "tgamma":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 ldouble: 4
 
 Function: "y0":
diff --git a/sysdeps/unix/sysdep.h b/sysdeps/unix/sysdep.h
index a19e84165b..adc8d71f49 100644
--- a/sysdeps/unix/sysdep.h
+++ b/sysdeps/unix/sysdep.h
@@ -24,6 +24,9 @@
 #define	SYSCALL__(name, args)	PSEUDO (__##name, name, args)
 #define	SYSCALL(name, args)	PSEUDO (name, name, args)
 
+#ifndef __ASSEMBLER__
+# include <errno.h>
+
 #define __SYSCALL_CONCAT_X(a,b)     a##b
 #define __SYSCALL_CONCAT(a,b)       __SYSCALL_CONCAT_X (a, b)
 
@@ -108,42 +111,148 @@
 #define INLINE_SYSCALL_CALL(...) \
   __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)
 
-#if IS_IN (rtld)
-/* All cancellation points are compiled out in the dynamic loader.  */
-# define NO_SYSCALL_CANCEL_CHECKING 1
+#define __INTERNAL_SYSCALL_NCS0(name) \
+  INTERNAL_SYSCALL_NCS (name, 0)
+#define __INTERNAL_SYSCALL_NCS1(name, a1) \
+  INTERNAL_SYSCALL_NCS (name, 1, a1)
+#define __INTERNAL_SYSCALL_NCS2(name, a1, a2) \
+  INTERNAL_SYSCALL_NCS (name, 2, a1, a2)
+#define __INTERNAL_SYSCALL_NCS3(name, a1, a2, a3) \
+  INTERNAL_SYSCALL_NCS (name, 3, a1, a2, a3)
+#define __INTERNAL_SYSCALL_NCS4(name, a1, a2, a3, a4) \
+  INTERNAL_SYSCALL_NCS (name, 4, a1, a2, a3, a4)
+#define __INTERNAL_SYSCALL_NCS5(name, a1, a2, a3, a4, a5) \
+  INTERNAL_SYSCALL_NCS (name, 5, a1, a2, a3, a4, a5)
+#define __INTERNAL_SYSCALL_NCS6(name, a1, a2, a3, a4, a5, a6) \
+  INTERNAL_SYSCALL_NCS (name, 6, a1, a2, a3, a4, a5, a6)
+#define __INTERNAL_SYSCALL_NCS7(name, a1, a2, a3, a4, a5, a6, a7) \
+  INTERNAL_SYSCALL_NCS (name, 7, a1, a2, a3, a4, a5, a6, a7)
+
+/* Issue a syscall defined by syscall number plus any other argument required.
+   It is similar to INTERNAL_SYSCALL_NCS macro, but without the need to pass
+   the expected argument number as third parameter.  */
+#define INTERNAL_SYSCALL_NCS_CALL(...) \
+  __INTERNAL_SYSCALL_DISP (__INTERNAL_SYSCALL_NCS, __VA_ARGS__)
+
+/* Cancellation macros.  */
+#include <syscall_types.h>
+
+/* Adjust both the __syscall_cancel and the SYSCALL_CANCEL macro to support
+   7 arguments instead of default 6 (curently only mip32).  It avoid add
+   the requirement to each architecture to support 7 argument macros
+   {INTERNAL,INLINE}_SYSCALL.  */
+#ifdef HAVE_CANCELABLE_SYSCALL_WITH_7_ARGS
+# define __SYSCALL_CANCEL7_ARG_DEF	__syscall_arg_t a7,
+# define __SYSCALL_CANCEL7_ARCH_ARG_DEF ,__syscall_arg_t a7
+# define __SYSCALL_CANCEL7_ARG		0,
+# define __SYSCALL_CANCEL7_ARG7		a7,
+# define __SYSCALL_CANCEL7_ARCH_ARG7	, a7
 #else
-# define NO_SYSCALL_CANCEL_CHECKING SINGLE_THREAD_P
+# define __SYSCALL_CANCEL7_ARG_DEF
+# define __SYSCALL_CANCEL7_ARCH_ARG_DEF
+# define __SYSCALL_CANCEL7_ARG
+# define __SYSCALL_CANCEL7_ARG7
+# define __SYSCALL_CANCEL7_ARCH_ARG7
 #endif
+long int __internal_syscall_cancel (__syscall_arg_t a1, __syscall_arg_t a2,
+				    __syscall_arg_t a3, __syscall_arg_t a4,
+				    __syscall_arg_t a5, __syscall_arg_t a6,
+				    __SYSCALL_CANCEL7_ARG_DEF
+				    __syscall_arg_t nr) attribute_hidden;
 
-#define SYSCALL_CANCEL(...) \
-  ({									     \
-    long int sc_ret;							     \
-    if (NO_SYSCALL_CANCEL_CHECKING)					     \
-      sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); 			     \
-    else								     \
-      {									     \
-	int sc_cancel_oldtype = LIBC_CANCEL_ASYNC ();			     \
-	sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__);			     \
-        LIBC_CANCEL_RESET (sc_cancel_oldtype);				     \
-      }									     \
-    sc_ret;								     \
-  })
+long int __syscall_cancel (__syscall_arg_t arg1, __syscall_arg_t arg2,
+			   __syscall_arg_t arg3, __syscall_arg_t arg4,
+			   __syscall_arg_t arg5, __syscall_arg_t arg6,
+			   __SYSCALL_CANCEL7_ARG_DEF
+			   __syscall_arg_t nr) attribute_hidden;
 
-/* Issue a syscall defined by syscall number plus any other argument
-   required.  Any error will be returned unmodified (including errno).  */
-#define INTERNAL_SYSCALL_CANCEL(...) \
-  ({									     \
-    long int sc_ret;							     \
-    if (NO_SYSCALL_CANCEL_CHECKING) 					     \
-      sc_ret = INTERNAL_SYSCALL_CALL (__VA_ARGS__); 			     \
-    else								     \
-      {									     \
-	int sc_cancel_oldtype = LIBC_CANCEL_ASYNC ();			     \
-	sc_ret = INTERNAL_SYSCALL_CALL (__VA_ARGS__);			     \
-        LIBC_CANCEL_RESET (sc_cancel_oldtype);				     \
-      }									     \
-    sc_ret;								     \
-  })
+#define __SYSCALL_CANCEL0(name)						\
+  __syscall_cancel (0, 0, 0, 0, 0, 0, __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __SYSCALL_CANCEL1(name, a1)					\
+  __syscall_cancel (__SSC (a1), 0, 0, 0, 0, 0,				\
+		    __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __SYSCALL_CANCEL2(name, a1, a2) \
+  __syscall_cancel (__SSC (a1), __SSC (a2), 0, 0, 0, 0,			\
+		    __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __SYSCALL_CANCEL3(name, a1, a2, a3) \
+  __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), 0, 0, 0,	\
+		    __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __SYSCALL_CANCEL4(name, a1, a2, a3, a4) \
+  __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3),			\
+		    __SSC(a4), 0, 0, __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __SYSCALL_CANCEL5(name, a1, a2, a3, a4, a5) \
+  __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), __SSC(a4),	\
+		    __SSC (a5), 0, __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __SYSCALL_CANCEL6(name, a1, a2, a3, a4, a5, a6) \
+  __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), __SSC (a4),	\
+		    __SSC (a5), __SSC (a6), __SYSCALL_CANCEL7_ARG	\
+		    __NR_##name)
+#define __SYSCALL_CANCEL7(name, a1, a2, a3, a4, a5, a6, a7)		\
+  __syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), __SSC (a4),	\
+		    __SSC (a5), __SSC (a6), __SSC (a7), __NR_##name)
+
+#define __SYSCALL_CANCEL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
+#define __SYSCALL_CANCEL_NARGS(...) \
+  __SYSCALL_CANCEL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)
+#define __SYSCALL_CANCEL_CONCAT_X(a,b)     a##b
+#define __SYSCALL_CANCEL_CONCAT(a,b)       __SYSCALL_CANCEL_CONCAT_X (a, b)
+#define __SYSCALL_CANCEL_DISP(b,...) \
+  __SYSCALL_CANCEL_CONCAT (b,__SYSCALL_CANCEL_NARGS(__VA_ARGS__))(__VA_ARGS__)
+
+/* Issue a cancellable syscall defined first argument plus any other argument
+   required.  If and error occurs its value, the macro returns -1 and sets
+   errno accordingly.  */
+#define __SYSCALL_CANCEL_CALL(...) \
+  __SYSCALL_CANCEL_DISP (__SYSCALL_CANCEL, __VA_ARGS__)
+
+#define __INTERNAL_SYSCALL_CANCEL0(name)				\
+  __internal_syscall_cancel (0, 0, 0, 0, 0, 0, __SYSCALL_CANCEL7_ARG	\
+			     __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL1(name, a1)				\
+  __internal_syscall_cancel (__SSC (a1), 0, 0, 0, 0, 0,			\
+			     __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL2(name, a1, a2)			\
+  __internal_syscall_cancel (__SSC (a1), __SSC (a2), 0, 0, 0, 0,	\
+			     __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL3(name, a1, a2, a3)			\
+  __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3), 0,	\
+			     0, 0, __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL4(name, a1, a2, a3, a4)		\
+  __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3),	\
+			     __SSC(a4), 0, 0,				\
+			     __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL5(name, a1, a2, a3, a4, a5)		\
+  __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3),	\
+			     __SSC(a4), __SSC (a5), 0,			\
+			     __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL6(name, a1, a2, a3, a4, a5, a6)	\
+  __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3),	\
+			     __SSC (a4), __SSC (a5), __SSC (a6),	\
+			     __SYSCALL_CANCEL7_ARG __NR_##name)
+#define __INTERNAL_SYSCALL_CANCEL7(name, a1, a2, a3, a4, a5, a6, a7) \
+  __internal_syscall_cancel (__SSC (a1), __SSC (a2), __SSC (a3),     \
+			     __SSC (a4), __SSC (a5), __SSC (a6),     \
+			     __SSC (a7), __NR_##name)
+
+/* Issue a cancellable syscall defined by syscall number NAME plus any other
+   argument required.  If an error occurs its value is returned as an negative
+   number unmodified and errno is not set.  */
+#define __INTERNAL_SYSCALL_CANCEL_CALL(...) \
+  __SYSCALL_CANCEL_DISP (__INTERNAL_SYSCALL_CANCEL, __VA_ARGS__)
+
+#if IS_IN (rtld)
+/* The loader does not need to handle thread cancellation, use direct
+   syscall instead.  */
+# define INTERNAL_SYSCALL_CANCEL(...) INTERNAL_SYSCALL_CALL(__VA_ARGS__)
+# define SYSCALL_CANCEL(...)          INLINE_SYSCALL_CALL (__VA_ARGS__)
+#else
+# define INTERNAL_SYSCALL_CANCEL(...) \
+  __INTERNAL_SYSCALL_CANCEL_CALL (__VA_ARGS__)
+# define SYSCALL_CANCEL(...) \
+  __SYSCALL_CANCEL_CALL (__VA_ARGS__)
+#endif
+
+#endif /* __ASSEMBLER__  */
 
 /* Machine-dependent sysdep.h files are expected to define the macro
    PSEUDO (function_name, syscall_name) to emit assembly code to define the
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 097b5a26fc..527c7a5ae8 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -95,6 +95,8 @@ sysdep_routines += \
   process_vm_writev \
   pselect32 \
   readahead \
+  sched_getattr \
+  sched_setattr \
   setfsgid \
   setfsuid \
   setvmaname \
@@ -206,6 +208,7 @@ tests += \
   tst-getauxval \
   tst-gettid \
   tst-gettid-kill \
+  tst-linux-mremap1 \
   tst-memfd_create \
   tst-misalign-clone \
   tst-mlock2 \
@@ -223,9 +226,11 @@ tests += \
   tst-process_mrelease \
   tst-quota \
   tst-rlimit-infinity \
+  tst-sched_setattr \
   tst-scm_rights \
   tst-sigtimedwait \
   tst-sync_file_range \
+  tst-syscall-restart \
   tst-sysconf-iov_max \
   tst-sysvmsg-linux \
   tst-sysvsem-linux \
@@ -394,6 +399,16 @@ $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py
 	  < /dev/null > $@ 2>&1; $(evaluate-test)
 $(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps)
 
+tests-special += \
+  $(objpfx)tst-sched-consts.out \
+  # tests-special
+$(objpfx)tst-sched-consts.out: ../sysdeps/unix/sysv/linux/tst-sched-consts.py
+	$(sysdeps-linux-python) \
+	  ../sysdeps/unix/sysv/linux/tst-sched-consts.py \
+	    $(sysdeps-linux-python-cc) \
+	  < /dev/null > $@ 2>&1; $(evaluate-test)
+$(objpfx)tst-sched-consts.out: $(sysdeps-linux-python-deps)
+
 tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
 
 endif # $(subdir) == misc
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 268ba1b6ac..213ff5f1fe 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -328,6 +328,10 @@ libc {
     posix_spawnattr_getcgroup_np;
     posix_spawnattr_setcgroup_np;
   }
+  GLIBC_2.41 {
+    sched_getattr;
+    sched_setattr;
+  }
   GLIBC_PRIVATE {
     # functions used in other libraries
     __syscall_rt_sigqueueinfo;
diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
index 7ee8a2167a..19b6316cb6 100644
--- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
@@ -158,6 +158,7 @@
 #define __NR_mq_timedsend 182
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
index 68eeca1c08..38db77e4f7 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
@@ -2748,3 +2748,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index b685106954..98687cae0d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -128,3 +128,8 @@ GLIBC_2.40 _ZGVsMxvv_hypot F
 GLIBC_2.40 _ZGVsMxvv_hypotf F
 GLIBC_2.40 _ZGVsMxvv_pow F
 GLIBC_2.40 _ZGVsMxvv_powf F
+GLIBC_2.41 _ZGVnN2v_logp1 F
+GLIBC_2.41 _ZGVnN2v_logp1f F
+GLIBC_2.41 _ZGVnN4v_logp1f F
+GLIBC_2.41 _ZGVsMxv_logp1 F
+GLIBC_2.41 _ZGVsMxv_logp1f F
diff --git a/sysdeps/unix/sysv/linux/aarch64/syscall_cancel.S b/sysdeps/unix/sysv/linux/aarch64/syscall_cancel.S
new file mode 100644
index 0000000000..e91a431b36
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/syscall_cancel.S
@@ -0,0 +1,59 @@
+/* Cancellable syscall wrapper.  Linux/AArch64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int [x0] __syscall_cancel_arch (int *cancelhandling [x0],
+					long int nr   [x1],
+					long int arg1 [x2],
+					long int arg2 [x3],
+					long int arg3 [x4],
+					long int arg4 [x5],
+					long int arg5 [x6],
+					long int arg6 [x7])  */
+
+ENTRY (__syscall_cancel_arch)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	ldr	w0, [x0]
+	tbnz    w0, TCB_CANCELED_BIT, 1f
+
+	/* Issue a 6 argument syscall, the nr [x1] being the syscall
+	   number.  */
+	mov	x8, x1
+	mov	x0, x2
+	mov	x1, x3
+	mov	x2, x4
+	mov	x3, x5
+	mov	x4, x6
+	mov	x5, x7
+	svc	0x0
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	ret
+
+1:
+	b	__syscall_do_cancel
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
index 0f4ea7670b..216a5575c6 100644
--- a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
@@ -24,6 +24,7 @@
 #define __NR_clock_nanosleep 422
 #define __NR_clock_settime 419
 #define __NR_clone 312
+#define __NR_clone3 545
 #define __NR_close 6
 #define __NR_close_range 546
 #define __NR_connect 98
@@ -189,6 +190,7 @@
 #define __NR_mq_timedsend 434
 #define __NR_mq_unlink 433
 #define __NR_mremap 341
+#define __NR_mseal 572
 #define __NR_msgctl 200
 #define __NR_msgget 201
 #define __NR_msgrcv 202
diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist
index 34c187b721..637bfce9fb 100644
--- a/sysdeps/unix/sysv/linux/alpha/libc.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist
@@ -3095,6 +3095,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/alpha/syscall_cancel.S b/sysdeps/unix/sysv/linux/alpha/syscall_cancel.S
new file mode 100644
index 0000000000..377eef48be
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/alpha/syscall_cancel.S
@@ -0,0 +1,80 @@
+/* Cancellable syscall wrapper.  Linux/alpha version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *ch,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+	.set noreorder
+	.set noat
+	.set nomacro
+ENTRY (__syscall_cancel_arch)
+	.frame	sp, 16, ra, 0
+	.mask	0x4000000,-16
+	cfi_startproc
+	ldah	gp, 0(t12)
+	lda	gp, 0(gp)
+	lda	sp, -16(sp)
+	cfi_def_cfa_offset (16)
+	mov	a1, v0
+	stq	ra, 0(sp)
+	cfi_offset (26, -16)
+	.prologue 1
+
+	.global	__syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	ldl	t0, 0(a0)
+	addl	zero, t0, t0
+	/* if (*ch & CANCELED_BITMASK)  */
+	and	t0, TCB_CANCELED_BITMASK, t0
+	bne	t0, 1f
+	mov	a2, a0
+	mov	a3, a1
+	mov	a4, a2
+	ldq	a4, 16(sp)
+	mov	a5, a3
+	ldq	a5, 24(sp)
+	.set	macro
+	callsys
+	.set	nomacro
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	subq	zero, v0, t0
+	ldq	ra, 0(sp)
+	cmovne	a3, t0, v0
+	lda	sp, 16(sp)
+	cfi_remember_state
+	cfi_restore (26)
+	cfi_def_cfa_offset (0)
+	ret	zero, (ra), 1
+	.align 4
+1:
+	cfi_restore_state
+	ldq 	t12, __syscall_do_cancel(gp)		!literal!2
+	jsr 	ra, (t12), __syscall_do_cancel		!lituse_jsr!2
+	cfi_endproc
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/arc/Makefile b/sysdeps/unix/sysv/linux/arc/Makefile
index eca9a9fd3a..dd3da52c39 100644
--- a/sysdeps/unix/sysv/linux/arc/Makefile
+++ b/sysdeps/unix/sysv/linux/arc/Makefile
@@ -11,12 +11,3 @@ ifeq ($(subdir),misc)
 sysdep_headers += sys/cachectl.h
 sysdep_routines += cacheflush
 endif
-
-abi-variants := arcle arcbe
-
-ifeq (,$(filter $(default-abi),$(abi-variants)))
-$(error Unknown ABI $(default-abi), must be one of $(abi-variants))
-endif
-
-abi-arcle-condition := !defined __BIG_ENDIAN__
-abi-arcbe-condition := defined __BIG_ENDIAN__
diff --git a/sysdeps/unix/sysv/linux/arc/arch-syscall.h b/sysdeps/unix/sysv/linux/arc/arch-syscall.h
index 90359482a8..ea581b0a6d 100644
--- a/sysdeps/unix/sysv/linux/arc/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/arc/arch-syscall.h
@@ -161,6 +161,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist
index 916c18ea94..4a305cf730 100644
--- a/sysdeps/unix/sysv/linux/arc/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libc.abilist
@@ -2509,3 +2509,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/arc/shlib-versions b/sysdeps/unix/sysv/linux/arc/shlib-versions
index 343c0a0450..62c1bd57b3 100644
--- a/sysdeps/unix/sysv/linux/arc/shlib-versions
+++ b/sysdeps/unix/sysv/linux/arc/shlib-versions
@@ -1,7 +1,3 @@
 DEFAULT                 GLIBC_2.32
 
-%ifdef HAVE_ARC_BE
-ld=ld-linux-arceb.so.2
-%else
 ld=ld-linux-arc.so.2
-%endif
diff --git a/sysdeps/unix/sysv/linux/arc/syscall_cancel.S b/sysdeps/unix/sysv/linux/arc/syscall_cancel.S
new file mode 100644
index 0000000000..fa02af4163
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/arc/syscall_cancel.S
@@ -0,0 +1,56 @@
+/* Cancellable syscall wrapper.  Linux/ARC version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	ld_s	r12,[r0]
+	bbit1	r12, TCB_CANCELED_BITMASK, 1f
+	mov_s	r8, r1
+	mov_s	r0, r2
+	mov_s	r1, r3
+	mov_s	r2, r4
+	mov_s	r3, r5
+	mov_s	r4, r6
+	mov_s	r5, r7
+	trap_s	0
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	j_s	[blink]
+
+	.align 4
+1:	push_s	blink
+	cfi_def_cfa_offset (4)
+	cfi_offset (31, -4)
+	bl	@__syscall_do_cancel
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/arm/arch-syscall.h b/sysdeps/unix/sysv/linux/arm/arch-syscall.h
index 4930167a03..2809f52f94 100644
--- a/sysdeps/unix/sysv/linux/arm/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/arm/arch-syscall.h
@@ -205,6 +205,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 275
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 304
 #define __NR_msgget 303
 #define __NR_msgrcv 302
diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
index ea95de282a..1d54f71b14 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
@@ -2801,6 +2801,8 @@ GLIBC_2.4 xdrstdio_create F
 GLIBC_2.4 xencrypt F
 GLIBC_2.4 xprt_register F
 GLIBC_2.4 xprt_unregister F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
index 1cdbc983e1..ff7e8bc40b 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
@@ -2798,6 +2798,8 @@ GLIBC_2.4 xdrstdio_create F
 GLIBC_2.4 xencrypt F
 GLIBC_2.4 xprt_register F
 GLIBC_2.4 xprt_unregister F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/arm/syscall_cancel.S b/sysdeps/unix/sysv/linux/arm/syscall_cancel.S
new file mode 100644
index 0000000000..6b899306e3
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/arm/syscall_cancel.S
@@ -0,0 +1,78 @@
+/* Cancellable syscall wrapper.  Linux/arm version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int [r0] __syscall_cancel_arch (int *cancelhandling [r0],
+					long int nr   [r1],
+					long int arg1 [r2],
+					long int arg2 [r3],
+					long int arg3 [SP],
+					long int arg4 [SP+4],
+					long int arg5 [SP+8],
+					long int arg6 [SP+12])  */
+
+	.syntax unified
+
+ENTRY (__syscall_cancel_arch)
+	.fnstart
+	mov	ip, sp
+	stmfd	sp!, {r4, r5, r6, r7, lr}
+	.save	{r4, r5, r6, r7, lr}
+
+	cfi_adjust_cfa_offset (20)
+	cfi_rel_offset (r4, 0)
+	cfi_rel_offset (r5, 4)
+	cfi_rel_offset (r6, 8)
+	cfi_rel_offset (r7, 12)
+	cfi_rel_offset (lr, 16)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	ldr	r0, [r0]
+	tst	r0, #TCB_CANCELED_BITMASK
+	bne	1f
+
+	/* Issue a 6 argument syscall, the nr [r1] being the syscall
+	   number.  */
+	mov	r7, r1
+	mov	r0, r2
+	mov	r1, r3
+	ldmfd	ip, {r2, r3, r4, r5, r6}
+	svc	0x0
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	ldmfd	sp!, {r4, r5, r6, r7, lr}
+	cfi_adjust_cfa_offset (-20)
+        cfi_restore (r4)
+        cfi_restore (r5)
+        cfi_restore (r6)
+        cfi_restore (r7)
+        cfi_restore (lr)
+	BX (lr)
+
+1:
+	ldmfd	sp!, {r4, r5, r6, r7, lr}
+	b	__syscall_do_cancel
+	.fnend
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
index 628612b885..61c100d984 100644
--- a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
+++ b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
@@ -200,6 +200,7 @@
 # define F_SETLEASE	1024	/* Set a lease.  */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notifications on a directory.  */
+# define F_DUPFD_QUERY  1027    /* Compare two file descriptors for sameness.  */
 # define F_SETPIPE_SZ	1031	/* Set pipe page size array.  */
 # define F_GETPIPE_SZ	1032	/* Set pipe page size array.  */
 # define F_ADD_SEALS	1033	/* Add seals to file.  */
diff --git a/sysdeps/unix/sysv/linux/bits/mman-linux.h b/sysdeps/unix/sysv/linux/bits/mman-linux.h
index 522333c50a..161a885096 100644
--- a/sysdeps/unix/sysv/linux/bits/mman-linux.h
+++ b/sysdeps/unix/sysv/linux/bits/mman-linux.h
@@ -43,6 +43,7 @@
 #define MAP_PRIVATE	0x02		/* Changes are private.  */
 #define MAP_SHARED_VALIDATE	0x03	/* Share changes and validate
 					   extension flags.  */
+#define MAP_DROPPABLE	0x08		/* Zero memory under memory pressure.  */
 #define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
 
 /* Other flags.  */
diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h
index a53e1362a0..a02cb69de7 100644
--- a/sysdeps/unix/sysv/linux/bits/sched.h
+++ b/sysdeps/unix/sysv/linux/bits/sched.h
@@ -29,15 +29,58 @@
 #define SCHED_FIFO		1
 #define SCHED_RR		2
 #ifdef __USE_GNU
+# define SCHED_NORMAL		SCHED_OTHER
 # define SCHED_BATCH		3
 # define SCHED_ISO		4
 # define SCHED_IDLE		5
 # define SCHED_DEADLINE		6
 
+/* Flags that can be used in policy values.  */
 # define SCHED_RESET_ON_FORK	0x40000000
-#endif
 
-#ifdef __USE_GNU
+/* Flags for the sched_flags field in struct sched_attr.   */
+#define SCHED_FLAG_RESET_ON_FORK	0x01
+#define SCHED_FLAG_RECLAIM		0x02
+#define SCHED_FLAG_DL_OVERRUN		0x04
+#define SCHED_FLAG_KEEP_POLICY		0x08
+#define SCHED_FLAG_KEEP_PARAMS		0x10
+#define SCHED_FLAG_UTIL_CLAMP_MIN	0x20
+#define SCHED_FLAG_UTIL_CLAMP_MAX	0x40
+
+/* Combinations of sched_flags fields.  */
+#define SCHED_FLAG_KEEP_ALL		0x18
+#define SCHED_FLAG_UTIL_CLAMP		0x60
+
+/* Use "" to work around incorrect macro expansion of the
+   __has_include argument (GCC PR 80005).  */
+# ifdef __has_include
+#  if __has_include ("linux/sched/types.h")
+/* Some older Linux versions defined sched_param in <linux/sched/types.h>.  */
+#   define sched_param __glibc_mask_sched_param
+#   include <linux/sched/types.h>
+#   undef sched_param
+#  endif
+# endif
+# ifndef SCHED_ATTR_SIZE_VER0
+#  include <linux/types.h>
+#  define SCHED_ATTR_SIZE_VER0 48
+#  define SCHED_ATTR_SIZE_VER1 56
+struct sched_attr
+{
+  __u32 size;
+  __u32 sched_policy;
+  __u64 sched_flags;
+  __s32 sched_nice;
+  __u32 sched_priority;
+  __u64 sched_runtime;
+  __u64 sched_deadline;
+  __u64 sched_period;
+  __u32 sched_util_min;
+  __u32 sched_util_max;
+  /* Additional fields may be added at the end.  */
+};
+# endif /* !SCHED_ATTR_SIZE_VER0 */
+
 /* Cloning flags.  */
 # define CSIGNAL       0x000000ff /* Signal mask to be sent at exit.  */
 # define CLONE_VM      0x00000100 /* Set if VM shared between processes.  */
@@ -97,6 +140,17 @@ extern int getcpu (unsigned int *, unsigned int *) __THROW;
 
 /* Switch process to namespace of type NSTYPE indicated by FD.  */
 extern int setns (int __fd, int __nstype) __THROW;
+
+/* Apply the scheduling attributes from *ATTR to the process or thread TID.  */
+int sched_setattr (pid_t tid, struct sched_attr *attr, unsigned int flags)
+  __THROW __nonnull ((2));
+
+/* Obtain the scheduling attributes of the process or thread TID and
+   store it in *ATTR.  */
+int sched_getattr (pid_t tid, struct sched_attr *attr, unsigned int size,
+		   unsigned int flags)
+  __THROW __nonnull ((2)) __attr_access ((__write_only__, 2, 3));
+
 #endif
 
 __END_DECLS
diff --git a/sysdeps/unix/sysv/linux/bits/uio-ext.h b/sysdeps/unix/sysv/linux/bits/uio-ext.h
index ead7a09156..85ed21bac5 100644
--- a/sysdeps/unix/sysv/linux/bits/uio-ext.h
+++ b/sysdeps/unix/sysv/linux/bits/uio-ext.h
@@ -48,6 +48,8 @@ extern ssize_t process_vm_writev (pid_t __pid, const struct iovec *__lvec,
 #define RWF_NOWAIT	0x00000008 /* per-IO nonblocking mode.  */
 #define RWF_APPEND	0x00000010 /* per-IO O_APPEND.  */
 #define RWF_NOAPPEND	0x00000020 /* per-IO negation of O_APPEND */
+#define RWF_ATOMIC	0x00000040 /* Write is to be issued with torn-write
+				      prevention.  */
 
 __END_DECLS
 
diff --git a/sysdeps/unix/sysv/linux/csky/arch-syscall.h b/sysdeps/unix/sysv/linux/csky/arch-syscall.h
index 3f16a29f57..ede3551a00 100644
--- a/sysdeps/unix/sysv/linux/csky/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/csky/arch-syscall.h
@@ -168,6 +168,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
@@ -301,7 +302,7 @@
 #define __NR_swapon 224
 #define __NR_symlinkat 36
 #define __NR_sync 81
-#define __NR_sync_file_range 84
+#define __NR_sync_file_range2 84
 #define __NR_syncfs 267
 #define __NR_sysinfo 179
 #define __NR_syslog 116
diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist
index 96d45961e2..c3ed65467d 100644
--- a/sysdeps/unix/sysv/linux/csky/libc.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libc.abilist
@@ -2785,3 +2785,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/csky/syscall_cancel.S b/sysdeps/unix/sysv/linux/csky/syscall_cancel.S
new file mode 100644
index 0000000000..2989765f8c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/csky/syscall_cancel.S
@@ -0,0 +1,114 @@
+/* Cancellable syscall wrapper.  Linux/csky version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+#ifdef SHARED
+# define STACK_ADJ 4
+#else
+# define STACK_ADJ 0
+#endif
+
+ENTRY (__syscall_cancel_arch)
+	subi	sp, sp, 16 + STACK_ADJ
+	cfi_def_cfa_offset (16 + STACK_ADJ)
+#ifdef SHARED
+	st.w	gb, (sp, 16)
+	lrw	t1, 1f@GOTPC
+	cfi_offset (gb, -4)
+	grs	gb, 1f
+1:
+#endif
+	st.w	lr, (sp, 12)
+	st.w	l3, (sp, 8)
+	st.w	l1, (sp, 4)
+	st.w	l0, (sp, 0)
+#ifdef SHARED
+	addu	gb, gb, t1
+#endif
+	subi	sp, sp, 16
+	cfi_def_cfa_offset (32 + STACK_ADJ)
+	cfi_offset (lr, -( 4 + STACK_ADJ))
+	cfi_offset (l3, -( 8 + STACK_ADJ))
+	cfi_offset (l1, -(12 + STACK_ADJ))
+	cfi_offset (l0, -(16 + STACK_ADJ))
+
+	mov	l3, a1
+	mov	a1, a3
+	ld.w	a3, (sp, 32 + STACK_ADJ)
+	st.w	a3, (sp, 0)
+	ld.w	a3, (sp, 36 + STACK_ADJ)
+	st.w	a3, (sp, 4)
+	ld.w	a3, (sp, 40 + STACK_ADJ)
+	st.w	a3, (sp, 8)
+	ld.w	a3, (sp, 44 + STACK_ADJ)
+	st.w	a3, (sp, 12)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	ld.w	t0, (a0, 0)
+	andi	t0, t0, TCB_CANCELED_BITMASK
+	jbnez	t0, 2f
+	mov	a0, a2
+	ld.w	a3, (sp, 4)
+	ld.w	a2, (sp, 0)
+	ld.w	l0, (sp, 8)
+	ld.w	l1, (sp, 12)
+	trap	0
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	addi	sp, sp, 16
+	cfi_remember_state
+	cfi_def_cfa_offset (16 + STACK_ADJ)
+#ifdef SHARED
+	ld.w	gb, (sp, 16)
+	cfi_restore (gb)
+#endif
+	ld.w	lr, (sp, 12)
+	cfi_restore (lr)
+	ld.w	l3, (sp, 8)
+	cfi_restore (l3)
+	ld.w	l1, (sp, 4)
+	cfi_restore (l1)
+	ld.w	l0, (sp, 0)
+	cfi_restore (l0)
+	addi	sp, sp, 16
+	cfi_def_cfa_offset (0)
+	rts
+
+2:
+	cfi_restore_state
+#ifdef SHARED
+	lrw	a3, __syscall_do_cancel@GOTOFF
+	addu	a3, a3, gb
+	jsr	a3
+#else
+	jbsr	__syscall_do_cancel
+#endif
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/getdents64.c b/sysdeps/unix/sysv/linux/getdents64.c
index 227fbf21ae..795bd935f0 100644
--- a/sysdeps/unix/sysv/linux/getdents64.c
+++ b/sysdeps/unix/sysv/linux/getdents64.c
@@ -33,100 +33,3 @@ __getdents64 (int fd, void *buf, size_t nbytes)
 }
 libc_hidden_def (__getdents64)
 weak_alias (__getdents64, getdents64)
-
-#if _DIRENT_MATCHES_DIRENT64
-strong_alias (__getdents64, __getdents)
-#else
-# include <shlib-compat.h>
-
-# if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2)
-#  include <olddirent.h>
-#  include <unistd.h>
-
-static ssize_t
-handle_overflow (int fd, __off64_t offset, ssize_t count)
-{
-  /* If this is the first entry in the buffer, we can report the
-     error.  */
-  if (offset == 0)
-    {
-      __set_errno (EOVERFLOW);
-      return -1;
-    }
-
-  /* Otherwise, seek to the overflowing entry, so that the next call
-     will report the error, and return the data read so far.  */
-  if (__lseek64 (fd, offset, SEEK_SET) != 0)
-    return -1;
-  return count;
-}
-
-ssize_t
-__old_getdents64 (int fd, char *buf, size_t nbytes)
-{
-  /* We do not move the individual directory entries.  This is only
-     possible if the target type (struct __old_dirent64) is smaller
-     than the source type.  */
-  _Static_assert (offsetof (struct __old_dirent64, d_name)
-		  <= offsetof (struct dirent64, d_name),
-		  "__old_dirent64 is larger than dirent64");
-  _Static_assert (__alignof__ (struct __old_dirent64)
-		  <= __alignof__ (struct dirent64),
-		  "alignment of __old_dirent64 is larger than dirent64");
-
-  ssize_t retval = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes);
-  if (retval > 0)
-    {
-      /* This is the marker for the first entry.  Offset 0 is reserved
-	 for the first entry (see rewinddir).  Here, we use it as a
-	 marker for the first entry in the buffer.  We never actually
-	 seek to offset 0 because handle_overflow reports the error
-	 directly, so it does not matter that the offset is incorrect
-	 if entries have been read from the descriptor before (so that
-	 the descriptor is not actually at offset 0).  */
-      __off64_t previous_offset = 0;
-
-      char *p = buf;
-      char *end = buf + retval;
-      while (p < end)
-	{
-	  struct dirent64 *source = (struct dirent64 *) p;
-
-	  /* Copy out the fixed-size data.  */
-	  __ino_t ino = source->d_ino;
-	  __off64_t offset = source->d_off;
-	  unsigned int reclen = source->d_reclen;
-	  unsigned char type = source->d_type;
-
-	  /* Check for ino_t overflow.  */
-	  if (__glibc_unlikely (ino != source->d_ino))
-	    return handle_overflow (fd, previous_offset, p - buf);
-
-	  /* Convert to the target layout.  Use a separate struct and
-	     memcpy to side-step aliasing issues.  */
-	  struct __old_dirent64 result;
-	  result.d_ino = ino;
-	  result.d_off = offset;
-	  result.d_reclen = reclen;
-	  result.d_type = type;
-
-	  /* Write the fixed-sized part of the result to the
-	     buffer.  */
-	  size_t result_name_offset = offsetof (struct __old_dirent64, d_name);
-	  memcpy (p, &result, result_name_offset);
-
-	  /* Adjust the position of the name if necessary.  Copy
-	     everything until the end of the record, including the
-	     terminating NUL byte.  */
-	  if (result_name_offset != offsetof (struct dirent64, d_name))
-	    memmove (p + result_name_offset, source->d_name,
-		     reclen - offsetof (struct dirent64, d_name));
-
-	  p += reclen;
-	  previous_offset = offset;
-	}
-     }
-  return retval;
-}
-# endif /* SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2)  */
-#endif /* _DIRENT_MATCHES_DIRENT64  */
diff --git a/sysdeps/unix/sysv/linux/gettimeofday.c b/sysdeps/unix/sysv/linux/gettimeofday.c
index 7ab147c614..5ca8916611 100644
--- a/sysdeps/unix/sysv/linux/gettimeofday.c
+++ b/sysdeps/unix/sysv/linux/gettimeofday.c
@@ -37,11 +37,11 @@ __gettimeofday_syscall (struct timeval *restrict tv, void *restrict tz)
 }
 
 # undef INIT_ARCH
-# define INIT_ARCH() \
-  void *vdso_gettimeofday = dl_vdso_vsym (HAVE_GETTIMEOFDAY_VSYSCALL)
+# define INIT_ARCH()
 libc_ifunc (__gettimeofday,
-	    vdso_gettimeofday ? VDSO_IFUNC_RET (vdso_gettimeofday)
-			      : (void *) __gettimeofday_syscall)
+	    GLRO(dl_vdso_gettimeofday) != NULL
+	    ? VDSO_IFUNC_RET (GLRO(dl_vdso_gettimeofday))
+	    : (void *) __gettimeofday_syscall)
 
 # else
 int
diff --git a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
index a1b2c819d6..08b153f2cc 100644
--- a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
@@ -197,6 +197,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 230
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 191
 #define __NR_msgget 190
 #define __NR_msgrcv 189
diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist
index fbcd60c2b3..8de7644a59 100644
--- a/sysdeps/unix/sysv/linux/hppa/libc.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist
@@ -2821,6 +2821,8 @@ GLIBC_2.4 sys_errlist D 0x400
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/hppa/syscall_cancel.S b/sysdeps/unix/sysv/linux/hppa/syscall_cancel.S
new file mode 100644
index 0000000000..b9c19747ea
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/hppa/syscall_cancel.S
@@ -0,0 +1,81 @@
+/* Cancellable syscall wrapper.  Linux/hppa version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   long int nr,
+				   long int arg1,
+				   long int arg2,
+				   long int arg3,
+				   long int arg4,
+				   long int arg5,
+				   long int arg6)  */
+
+	.text
+ENTRY(__syscall_cancel_arch)
+	stw	%r2,-20(%r30)
+	ldo	128(%r30),%r30
+	cfi_def_cfa_offset (-128)
+	cfi_offset (2, -20)
+	ldw	-180(%r30),%r28
+	copy	%r26,%r20
+	stw	%r28,-108(%r30)
+	ldw	-184(%r30),%r28
+	copy	%r24,%r26
+	stw	%r28,-112(%r30)
+	ldw	-188(%r30),%r28
+	stw	%r28,-116(%r30)
+	ldw	-192(%r30),%r28
+	stw	%r4,-104(%r30)
+	stw	%r28,-120(%r30)
+	copy	%r25,%r28
+	copy	%r23,%r25
+#ifdef __PIC__
+	stw	%r19,-32(%r30)
+#endif
+	cfi_offset (4, 24)
+
+	.global __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	ldw	0(%r20),%r20
+	bb,<	%r20,31-TCB_CANCELED_BIT,1f
+	ldw	-120(%r30),%r21
+	ldw	-116(%r30),%r22
+	ldw	-112(%r30),%r23
+	ldw	-108(%r30),%r24
+	copy	%r19, %r4
+	ble	0x100(%sr2, %r0)
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	copy	%r28,%r20
+	copy	%r4,%r19
+
+	ldw	-148(%r30),%r2
+	ldw	-104(%r30),%r4
+	bv	%r0(%r2)
+	ldo	-128(%r30),%r30
+1:
+	bl	__syscall_do_cancel,%r2
+	nop
+	nop
+
+END(__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/i386/arch-syscall.h b/sysdeps/unix/sysv/linux/i386/arch-syscall.h
index cc775432d6..500ca1ec70 100644
--- a/sysdeps/unix/sysv/linux/i386/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/i386/arch-syscall.h
@@ -222,6 +222,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 278
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist
index c989b433c0..4fedf775d4 100644
--- a/sysdeps/unix/sysv/linux/i386/libc.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libc.abilist
@@ -3005,6 +3005,8 @@ GLIBC_2.4 sys_errlist D 0x210
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/i386/syscall_cancel.S b/sysdeps/unix/sysv/linux/i386/syscall_cancel.S
new file mode 100644
index 0000000000..46fb746da0
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/i386/syscall_cancel.S
@@ -0,0 +1,104 @@
+/* Cancellable syscall wrapper.  Linux/i686 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int [eax] __syscall_cancel_arch (int *cancelhandling [SP],
+					 long int nr   [SP+4],
+					 long int arg1 [SP+8],
+					 long int arg2 [SP+12],
+					 long int arg3 [SP+16],
+					 long int arg4 [SP+20],
+					 long int arg5 [SP+24],
+					 long int arg6 [SP+28])  */
+
+ENTRY (__syscall_cancel_arch)
+	pushl %ebp
+	cfi_def_cfa_offset (8)
+	cfi_offset (ebp, -8)
+	pushl %edi
+	cfi_def_cfa_offset (12)
+	cfi_offset (edi, -12)
+	pushl %esi
+	cfi_def_cfa_offset (16)
+	cfi_offset (esi, -16)
+	pushl %ebx
+	cfi_def_cfa_offset (20)
+	cfi_offset (ebx, -20)
+
+	.global __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	movl	20(%esp), %eax
+	testb	$TCB_CANCELED_BITMASK, (%eax)
+	jne     1f
+
+	/* Issue a 6 argument syscall, the nr [%eax] being the syscall
+	   number.  */
+	movl    24(%esp), %eax
+	movl    28(%esp), %ebx
+	movl    32(%esp), %ecx
+	movl    36(%esp), %edx
+	movl    40(%esp), %esi
+	movl    44(%esp), %edi
+	movl    48(%esp), %ebp
+
+	/* We can not use the vDSO helper for syscall (__kernel_vsyscall)
+	   because the returned PC from kernel will point to the vDSO page
+	   instead of the expected __syscall_cancel_arch_{start,end}
+	   marks.  */
+	int	$0x80
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	popl %ebx
+	cfi_restore (ebx)
+	cfi_def_cfa_offset (16)
+	popl %esi
+	cfi_restore (esi)
+	cfi_def_cfa_offset (12)
+	popl %edi
+	cfi_restore (edi)
+	cfi_def_cfa_offset (8)
+	popl %ebp
+	cfi_restore (ebp)
+	cfi_def_cfa_offset (4)
+        ret
+
+1:
+	/* Although the __syscall_do_cancel do not return, we need to stack
+	   being set correctly for unwind.  */
+	popl %ebx
+	cfi_restore (ebx)
+	cfi_def_cfa_offset (16)
+	popl %esi
+	cfi_restore (esi)
+	cfi_def_cfa_offset (12)
+	popl %edi
+	cfi_restore (edi)
+	cfi_def_cfa_offset (8)
+	popl %ebp
+	cfi_restore (ebp)
+	cfi_def_cfa_offset (4)
+	jmp __syscall_do_cancel
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/include/bits/mman-shared.h b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h
new file mode 100644
index 0000000000..7c14b5df3c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h
@@ -0,0 +1,16 @@
+#include <sysdeps/unix/sysv/linux/bits/mman-shared.h>
+
+#ifndef _ISOMAC
+
+extern __typeof (pkey_alloc) __pkey_alloc;
+libc_hidden_proto (__pkey_alloc)
+extern __typeof (pkey_free) __pkey_free;
+libc_hidden_proto (__pkey_free)
+extern __typeof (pkey_mprotect) __pkey_mprotect;
+libc_hidden_proto (__pkey_mprotect)
+extern __typeof (pkey_get) __pkey_get;
+libc_hidden_proto (__pkey_get)
+extern __typeof (pkey_set) __pkey_set;
+libc_hidden_proto (__pkey_set)
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h
index a6fae59aaa..6e3a3d7692 100644
--- a/sysdeps/unix/sysv/linux/internal-signals.h
+++ b/sysdeps/unix/sysv/linux/internal-signals.h
@@ -90,6 +90,15 @@ internal_signal_restore_set (const internal_sigset_t *set)
 			 __NSIG_BYTES);
 }
 
+static inline void
+internal_signal_unblock_signal (int sig)
+{
+  internal_sigset_t set;
+  internal_sigemptyset (&set);
+  internal_sigaddset (&set, sig);
+  INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &set, NULL,
+			 __NSIG_BYTES);
+}
 
 /* It is used on timer_create code directly on sigwaitinfo call, so it can not
    use the internal_sigset_t definitions.  */
diff --git a/sysdeps/unix/sysv/linux/internal-sigset.h b/sysdeps/unix/sysv/linux/internal-sigset.h
index 5d7020b42d..4b19affd75 100644
--- a/sysdeps/unix/sysv/linux/internal-sigset.h
+++ b/sysdeps/unix/sysv/linux/internal-sigset.h
@@ -21,7 +21,7 @@
 
 #include <sigsetops.h>
 
-typedef struct
+typedef struct _internal_sigset_t
 {
   unsigned long int __val[__NSIG_WORDS];
 } internal_sigset_t;
diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
index 56bb08718a..8bb82448a7 100644
--- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
@@ -155,6 +155,7 @@
 #define __NR_mq_timedsend 182
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h
new file mode 100644
index 0000000000..0062756b5c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h
@@ -0,0 +1,21 @@
+/* Regularize <asm/unistd.h> definitions.  LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* To avoid the messy usage of the fstat, newfstatat, and statx system calls, we
+only use statx.  */
+#undef __NR_fstat
+#undef __NR_newfstatat
diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
index 0023ec1fa1..0024282289 100644
--- a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
@@ -2269,3 +2269,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/loongarch/syscall_cancel.S b/sysdeps/unix/sysv/linux/loongarch/syscall_cancel.S
new file mode 100644
index 0000000000..edea9632ff
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/syscall_cancel.S
@@ -0,0 +1,50 @@
+/* Cancellable syscall wrapper.  Linux/loongarch version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+ENTRY (__syscall_cancel_arch)
+
+	.global __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	ld.w	t0, a0, 0
+	andi	t0, t0, TCB_CANCELED_BITMASK
+	bnez	t0, 1f
+
+	/* Issue a 6 argument syscall.  */
+	move	t1, a1
+	move	a0, a2
+	move	a1, a3
+	move	a2, a4
+	move	a3, a5
+	move	a4, a6
+	move	a5, a7
+	move	a7, t1
+	syscall 0
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	jr	ra
+1:
+	b	__syscall_do_cancel
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
index 79f277dd5b..4ab34f6228 100644
--- a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
@@ -213,6 +213,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 272
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
index d9bd6a9b56..142595eb3e 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
@@ -2781,6 +2781,8 @@ GLIBC_2.4 xdrstdio_create F
 GLIBC_2.4 xencrypt F
 GLIBC_2.4 xprt_register F
 GLIBC_2.4 xprt_unregister F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
index 439796d693..85e7746c10 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
@@ -2948,6 +2948,8 @@ GLIBC_2.4 sys_errlist D 0x210
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/m68k/syscall_cancel.S b/sysdeps/unix/sysv/linux/m68k/syscall_cancel.S
new file mode 100644
index 0000000000..8923bcc71c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/m68k/syscall_cancel.S
@@ -0,0 +1,84 @@
+/* Cancellable syscall wrapper.  Linux/m68k version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+
+ENTRY (__syscall_cancel_arch)
+#ifdef __mcoldfire__
+	lea	(-16,%sp),%sp
+	movem.l	%d2-%d5,(%sp)
+#else
+	movem.l	%d2-%d5,-(%sp)
+#endif
+	cfi_def_cfa_offset (20)
+	cfi_offset (2, -20)
+	cfi_offset (3, -16)
+	cfi_offset (4, -12)
+	cfi_offset (5, -8)
+
+	.global __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	move.l	20(%sp),%a0
+	move.l	(%a0),%d0
+#ifdef __mcoldfire__
+	move.w	%d0,%ccr
+	jeq	1f
+#else
+	btst	#TCB_CANCELED_BIT,%d0
+	jne 	1f
+#endif
+
+	move.l	48(%sp),%a0
+	move.l	44(%sp),%d5
+	move.l	40(%sp),%d4
+	move.l	36(%sp),%d3
+	move.l	32(%sp),%d2
+	move.l	28(%sp),%d1
+	move.l	24(%sp),%d0
+	trap #0
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+#ifdef __mcoldfire__
+	movem.l	(%sp),%d2-%d5
+	lea	(16,%sp),%sp
+#else
+	movem.l	(%sp)+,%d2-%d5
+#endif
+	rts
+
+1:
+#ifdef PIC
+	bsr.l __syscall_do_cancel
+#else
+	jsr __syscall_do_cancel
+#endif
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
index 779d5d5d70..79e225e50c 100644
--- a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
@@ -221,6 +221,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 278
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 331
 #define __NR_msgget 332
 #define __NR_msgrcv 333
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
index 1069d3252c..91dc1b8378 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
@@ -2834,3 +2834,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
index 17abe08c8b..3440e90f6f 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
@@ -2831,3 +2831,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/microblaze/syscall_cancel.S b/sysdeps/unix/sysv/linux/microblaze/syscall_cancel.S
new file mode 100644
index 0000000000..1f9d202bf5
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/microblaze/syscall_cancel.S
@@ -0,0 +1,61 @@
+/* Cancellable syscall wrapper.  Linux/microblaze version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   long int nr,
+				   long int arg1,
+				   long int arg2,
+				   long int arg3,
+				   long int arg4,
+				   long int arg5,
+				   long int arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	lwi	r3,r5,0
+	andi	r3,r3,TCB_CANCELED_BITMASK
+	bneid	r3,1f
+	addk	r12,r6,r0
+
+	addk	r5,r7,r0
+	addk	r6,r8,r0
+	addk	r7,r9,r0
+	addk	r8,r10,r0
+	lwi	r9,r1,56
+	lwi	r10,r1,60
+	brki	r14,8
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	nop
+	lwi	r15,r1,0
+	rtsd	r15,8
+	addik	r1,r1,28
+
+1:
+	brlid	r15, __syscall_do_cancel
+	nop
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
index 86ffd5ce84..dadd7f3130 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
@@ -211,6 +211,7 @@
 #define __NR_mq_timedsend_time64 4418
 #define __NR_mq_unlink 4272
 #define __NR_mremap 4167
+#define __NR_mseal 4462
 #define __NR_msgctl 4402
 #define __NR_msgget 4399
 #define __NR_msgrcv 4401
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
index 799e508950..5ee7b8c52f 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
@@ -2909,6 +2909,8 @@ GLIBC_2.4 renameat F
 GLIBC_2.4 symlinkat F
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
index 1c10996cbc..6cb6328e7c 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
@@ -2907,6 +2907,8 @@ GLIBC_2.4 renameat F
 GLIBC_2.4 symlinkat F
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/syscall_cancel.S b/sysdeps/unix/sysv/linux/mips/mips32/syscall_cancel.S
new file mode 100644
index 0000000000..eb3b2ed005
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/mips32/syscall_cancel.S
@@ -0,0 +1,128 @@
+/* Cancellable syscall wrapper.  Linux/mips32 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6,
+				   __syscall_arg_t arg7)  */
+
+#define FRAME_SIZE 56
+
+NESTED (__syscall_cancel_arch, FRAME_SIZE, fp)
+	.mask	0xc0070000,-SZREG
+	.fmask	0x00000000,0
+
+	PTR_ADDIU sp, -FRAME_SIZE
+	cfi_def_cfa_offset (FRAME_SIZE)
+
+	sw	fp, 48(sp)
+	sw	ra, 52(sp)
+	sw	s2, 44(sp)
+	sw	s1, 40(sp)
+	sw	s0, 36(sp)
+#ifdef __PIC__
+	.cprestore	16
+#endif
+	cfi_offset (ra, -4)
+	cfi_offset (fp, -8)
+	cfi_offset (s2, -12)
+	cfi_offset (s1, -16)
+	cfi_offset (s0, -20)
+
+	move	fp ,sp
+	cfi_def_cfa_register (fp)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	lw	v0, 0(a0)
+	andi	v0, v0, TCB_CANCELED_BITMASK
+	bne	v0, zero, 2f
+
+	addiu	sp, sp, -16
+	addiu	v0, sp, 16
+	sw	v0, 24(fp)
+
+	move	s0, a1
+	move	a0, a2
+	move	a1, a3
+	lw	a2, 72(fp)
+	lw	a3, 76(fp)
+	lw	v0, 84(fp)
+	lw	s1, 80(fp)
+	lw	s2, 88(fp)
+
+	.set	noreorder
+	subu	sp, 32
+	sw	s1, 16(sp)
+	sw	v0, 20(sp)
+	sw	s2, 24(sp)
+	move	v0, s0
+	syscall
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	addiu	sp, sp, 32
+	.set	reorder
+
+	beq	a3, zero, 1f
+	subu	v0, zero, v0
+1:
+	move	sp, fp
+	cfi_remember_state
+	cfi_def_cfa_register (sp)
+	lw	ra, 52(fp)
+	lw	fp, 48(sp)
+	lw	s2, 44(sp)
+	lw	s1, 40(sp)
+	lw	s0, 36(sp)
+
+	.set	noreorder
+	.set	nomacro
+	jr	ra
+	addiu	sp,sp,FRAME_SIZE
+
+	.set	macro
+	.set	reorder
+
+	cfi_def_cfa_offset (0)
+	cfi_restore (s0)
+	cfi_restore (s1)
+	cfi_restore (s2)
+	cfi_restore (fp)
+	cfi_restore (ra)
+
+2:
+	cfi_restore_state
+#ifdef __PIC__
+	PTR_LA	t9, __syscall_do_cancel
+	jalr	t9
+#else
+	jal	__syscall_do_cancel
+#endif
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
index 1827caf595..47a1b97351 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
+++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
@@ -18,6 +18,10 @@
 #ifndef _LINUX_MIPS_MIPS32_SYSDEP_H
 #define _LINUX_MIPS_MIPS32_SYSDEP_H 1
 
+/* mips32 have cancelable syscalls with 7 arguments (currently only
+   sync_file_range).  */
+#define HAVE_CANCELABLE_SYSCALL_WITH_7_ARGS	1
+
 /* There is some commonality.  */
 #include <sysdeps/unix/sysv/linux/mips/sysdep.h>
 #include <sysdeps/unix/sysv/linux/sysdep.h>
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
index 5d37a686e5..db6b2d4609 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
@@ -194,6 +194,7 @@
 #define __NR_mq_timedsend_time64 6418
 #define __NR_mq_unlink 6235
 #define __NR_mremap 6024
+#define __NR_mseal 6462
 #define __NR_msgctl 6069
 #define __NR_msgget 6066
 #define __NR_msgrcv 6068
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
index 03d9655f26..ae7474c0f0 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
@@ -2915,6 +2915,8 @@ GLIBC_2.4 renameat F
 GLIBC_2.4 symlinkat F
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/syscall_types.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/syscall_types.h
new file mode 100644
index 0000000000..b3a8b0b634
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/syscall_types.h
@@ -0,0 +1,28 @@
+/* Types and macros used for syscall issuing.  MIPS64n32 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYSCALL_TYPES_H
+#define _SYSCALL_TYPES_H
+
+typedef long long int __syscall_arg_t;
+
+/* Convert X to a long long, without losing any bits if it is one
+   already or warning if it is a 32-bit pointer.  */
+#define __SSC(__x) ((__syscall_arg_t) (__typeof__ ((__x) - (__x))) (__x))
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
index 9b1e846e76..b4129a4dbd 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
@@ -183,6 +183,7 @@
 #define __NR_mq_timedsend 5232
 #define __NR_mq_unlink 5231
 #define __NR_mremap 5024
+#define __NR_mseal 5462
 #define __NR_msgctl 5069
 #define __NR_msgget 5066
 #define __NR_msgrcv 5068
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
index 05e402ed30..cdf040dec2 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
@@ -2817,6 +2817,8 @@ GLIBC_2.4 renameat F
 GLIBC_2.4 symlinkat F
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall_cancel.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall_cancel.S
new file mode 100644
index 0000000000..cfc0596b6a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall_cancel.S
@@ -0,0 +1,112 @@
+/* Cancellable syscall wrapper.  Linux/mips64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6,
+				   __syscall_arg_t arg7)  */
+
+#define FRAME_SIZE 32
+
+	.text
+NESTED (__syscall_cancel_arch, FRAME_SIZE, ra)
+	.mask	0x90010000, -SZREG
+	.fmask	0x00000000, 0
+	LONG_ADDIU	sp, sp, -FRAME_SIZE
+	cfi_def_cfa_offset (FRAME_SIZE)
+	sd		gp, 16(sp)
+	cfi_offset (gp, -16)
+	lui		gp, %hi(%neg(%gp_rel(__syscall_cancel_arch)))
+	LONG_ADDU	gp, gp, t9
+	sd		ra, 24(sp)
+	sd		s0, 8(sp)
+	cfi_offset (ra, -8)
+	cfi_offset (s0, -24)
+	LONG_ADDIU	gp, gp, %lo(%neg(%gp_rel(__syscall_cancel_arch)))
+
+	.global __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	lw		v0, 0(a0)
+	andi		v0, v0, TCB_CANCELED_BITMASK
+	.set noreorder
+	.set nomacro
+	bne		v0, zero, 2f
+	move		s0, a1
+	.set macro
+	.set reorder
+
+	move		a0, a2
+	move		a1, a3
+	move		a2, a4
+	move		a3, a5
+	move		a4, a6
+	move		a5, a7
+
+	.set noreorder
+	move		v0, s0
+	syscall
+	.set reorder
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	.set noreorder
+	.set nomacro
+#if __mips_isa_rev >= 6
+	beqzc	$7, 1f
+#else
+	bnel	a3, zero, 1f
+#endif
+	SUBU	v0, zero, v0
+	.set macro
+	.set reorder
+
+1:
+	ld		ra, 24(sp)
+	ld		gp, 16(sp)
+	ld		s0, 8(sp)
+
+	.set	noreorder
+	.set	nomacro
+	jr		ra
+	LONG_ADDIU	sp, sp, FRAME_SIZE
+	.set	macro
+	.set	reorder
+
+	cfi_remember_state
+	cfi_def_cfa_offset (0)
+	cfi_restore (s0)
+	cfi_restore (gp)
+	cfi_restore (ra)
+	.align	3
+2:
+	cfi_restore_state
+	LONG_L		t9, %got_disp(__syscall_do_cancel)(gp)
+	.reloc	3f, R_MIPS_JALR, __syscall_do_cancel
+3:	jalr		t9
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h
index 0a1711dad2..0438bed23d 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/sysdep.h
@@ -44,15 +44,7 @@
 #undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
 #define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1
 
-#if _MIPS_SIM == _ABIN32
-/* Convert X to a long long, without losing any bits if it is one
-   already or warning if it is a 32-bit pointer.  */
-# define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X))
-typedef long long int __syscall_arg_t;
-#else
-# define ARGIFY(X) ((long int) (X))
-typedef long int __syscall_arg_t;
-#endif
+#include <syscall_types.h>
 
 /* Note that the original Linux syscall restart convention required the
    instruction immediately preceding SYSCALL to initialize $v0 with the
@@ -120,7 +112,7 @@ typedef long int __syscall_arg_t;
 	long int _sys_result;						\
 									\
 	{								\
-	__syscall_arg_t _arg1 = ARGIFY (arg1);				\
+	__syscall_arg_t _arg1 = __SSC (arg1);				\
 	register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\
 	  = (number);							\
 	register __syscall_arg_t __v0 asm ("$2");			\
@@ -144,8 +136,8 @@ typedef long int __syscall_arg_t;
 	long int _sys_result;						\
 									\
 	{								\
-	__syscall_arg_t _arg1 = ARGIFY (arg1);				\
-	__syscall_arg_t _arg2 = ARGIFY (arg2);				\
+	__syscall_arg_t _arg1 = __SSC (arg1);				\
+	__syscall_arg_t _arg2 = __SSC (arg2);				\
 	register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\
 	  = (number);							\
 	register __syscall_arg_t __v0 asm ("$2");			\
@@ -170,9 +162,9 @@ typedef long int __syscall_arg_t;
 	long int _sys_result;						\
 									\
 	{								\
-	__syscall_arg_t _arg1 = ARGIFY (arg1);				\
-	__syscall_arg_t _arg2 = ARGIFY (arg2);				\
-	__syscall_arg_t _arg3 = ARGIFY (arg3);				\
+	__syscall_arg_t _arg1 = __SSC (arg1);				\
+	__syscall_arg_t _arg2 = __SSC (arg2);				\
+	__syscall_arg_t _arg3 = __SSC (arg3);				\
 	register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\
 	  = (number);							\
 	register __syscall_arg_t __v0 asm ("$2");			\
@@ -199,10 +191,10 @@ typedef long int __syscall_arg_t;
 	long int _sys_result;						\
 									\
 	{								\
-	__syscall_arg_t _arg1 = ARGIFY (arg1);				\
-	__syscall_arg_t _arg2 = ARGIFY (arg2);				\
-	__syscall_arg_t _arg3 = ARGIFY (arg3);				\
-	__syscall_arg_t _arg4 = ARGIFY (arg4);				\
+	__syscall_arg_t _arg1 = __SSC (arg1);				\
+	__syscall_arg_t _arg2 = __SSC (arg2);				\
+	__syscall_arg_t _arg3 = __SSC (arg3);				\
+	__syscall_arg_t _arg4 = __SSC (arg4);				\
 	register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\
 	  = (number);							\
 	register __syscall_arg_t __v0 asm ("$2");			\
@@ -229,11 +221,11 @@ typedef long int __syscall_arg_t;
 	long int _sys_result;						\
 									\
 	{								\
-	__syscall_arg_t _arg1 = ARGIFY (arg1);				\
-	__syscall_arg_t _arg2 = ARGIFY (arg2);				\
-	__syscall_arg_t _arg3 = ARGIFY (arg3);				\
-	__syscall_arg_t _arg4 = ARGIFY (arg4);				\
-	__syscall_arg_t _arg5 = ARGIFY (arg5);				\
+	__syscall_arg_t _arg1 = __SSC (arg1);				\
+	__syscall_arg_t _arg2 = __SSC (arg2);				\
+	__syscall_arg_t _arg3 = __SSC (arg3);				\
+	__syscall_arg_t _arg4 = __SSC (arg4);				\
+	__syscall_arg_t _arg5 = __SSC (arg5);				\
 	register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\
 	  = (number);							\
 	register __syscall_arg_t __v0 asm ("$2");			\
@@ -261,12 +253,12 @@ typedef long int __syscall_arg_t;
 	long int _sys_result;						\
 									\
 	{								\
-	__syscall_arg_t _arg1 = ARGIFY (arg1);				\
-	__syscall_arg_t _arg2 = ARGIFY (arg2);				\
-	__syscall_arg_t _arg3 = ARGIFY (arg3);				\
-	__syscall_arg_t _arg4 = ARGIFY (arg4);				\
-	__syscall_arg_t _arg5 = ARGIFY (arg5);				\
-	__syscall_arg_t _arg6 = ARGIFY (arg6);				\
+	__syscall_arg_t _arg1 = __SSC (arg1);				\
+	__syscall_arg_t _arg2 = __SSC (arg2);				\
+	__syscall_arg_t _arg3 = __SSC (arg3);				\
+	__syscall_arg_t _arg4 = __SSC (arg4);				\
+	__syscall_arg_t _arg5 = __SSC (arg5);				\
+	__syscall_arg_t _arg6 = __SSC (arg6);				\
 	register __syscall_arg_t __s0 asm ("$16") __attribute__ ((unused))\
 	  = (number);							\
 	register __syscall_arg_t __v0 asm ("$2");			\
diff --git a/sysdeps/unix/sysv/linux/mremap-failure.h b/sysdeps/unix/sysv/linux/mremap-failure.h
new file mode 100644
index 0000000000..c99ab30ca9
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mremap-failure.h
@@ -0,0 +1,30 @@
+/* mremap failure handling.  Linux version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <support/check.h>
+
+/* Return exit value on mremap failure with errno ERR.  */
+
+static int
+mremap_failure_exit (int err)
+{
+  if (err != EINVAL)
+    return EXIT_FAILURE;
+
+  return EXIT_UNSUPPORTED;
+}
diff --git a/sysdeps/unix/sysv/linux/mremap.c b/sysdeps/unix/sysv/linux/mremap.c
index 4f770799c4..1ada5c1f40 100644
--- a/sysdeps/unix/sysv/linux/mremap.c
+++ b/sysdeps/unix/sysv/linux/mremap.c
@@ -20,6 +20,12 @@
 #include <sysdep.h>
 #include <stdarg.h>
 #include <stddef.h>
+#include <errno.h>
+
+#define MREMAP_KNOWN_BITS \
+  (MREMAP_MAYMOVE \
+   | MREMAP_FIXED \
+   | MREMAP_DONTUNMAP)
 
 void *
 __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...)
@@ -27,7 +33,13 @@ __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...)
   va_list va;
   void *new_addr = NULL;
 
-  if (flags & MREMAP_FIXED)
+  if (flags & ~(MREMAP_KNOWN_BITS))
+    {
+      __set_errno (EINVAL);
+      return MAP_FAILED;
+    }
+
+  if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP))
     {
       va_start (va, flags);
       new_addr = va_arg (va, void *);
diff --git a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h
index abbc9ab6b0..b1d0fb6f6e 100644
--- a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h
@@ -24,6 +24,7 @@
 #define __NR_clock_settime 112
 #define __NR_clock_settime64 404
 #define __NR_clone 220
+#define __NR_clone3 435
 #define __NR_close 57
 #define __NR_close_range 436
 #define __NR_connect 203
@@ -167,6 +168,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/nios2/libc.abilist b/sysdeps/unix/sysv/linux/nios2/libc.abilist
index 3aa81766aa..773d4c5873 100644
--- a/sysdeps/unix/sysv/linux/nios2/libc.abilist
+++ b/sysdeps/unix/sysv/linux/nios2/libc.abilist
@@ -2873,3 +2873,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/nios2/syscall_cancel.S b/sysdeps/unix/sysv/linux/nios2/syscall_cancel.S
new file mode 100644
index 0000000000..19d0795886
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/nios2/syscall_cancel.S
@@ -0,0 +1,95 @@
+/* Cancellable syscall wrapper.  Linux/nios2 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+#ifdef SHARED
+	addi	sp, sp, -8
+	stw	r22, 0(sp)
+	nextpc	r22
+1:
+	movhi	r8, %hiadj(_gp_got - 1b)
+	addi	r8, r8, %lo(_gp_got - 1b)
+	stw	ra, 4(sp)
+	add	r22, r22, r8
+#else
+	addi	sp, sp, -4
+	cfi_def_cfa_offset (4)
+	stw	ra, 0(sp)
+	cfi_offset (31, -4)
+#endif
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	ldw	r3, 0(r4)
+	andi	r3, r3, TCB_CANCELED_BITMASK
+	bne	r3, zero, 3f
+	mov	r10, r6
+	mov	r2, r5
+#ifdef SHARED
+# define STACK_ADJ 4
+#else
+# define STACK_ADJ 0
+#endif
+	ldw	r9, (16 + STACK_ADJ)(sp)
+	mov	r5, r7
+	ldw	r8, (12 + STACK_ADJ)(sp)
+	ldw	r7, (8 + STACK_ADJ)(sp)
+	ldw	r6, (4 + STACK_ADJ)(sp)
+	mov	r4, r10
+	trap
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	beq	r7, zero, 2f
+	sub	r2, zero, r2
+2:
+#ifdef SHARED
+	ldw	ra, 4(sp)
+	ldw	r22, 0(sp)
+	addi	sp, sp, 8
+#else
+	ldw	ra, (0 + STACK_ADJ)(sp)
+	cfi_remember_state
+	cfi_restore (31)
+	addi	sp, sp, 4
+	cfi_def_cfa_offset (0)
+#endif
+	ret
+
+3:
+#ifdef SHARED
+	ldw	r2, %call(__syscall_do_cancel)(r22)
+	callr	r2
+#else
+	cfi_restore_state
+	call	__syscall_do_cancel
+#endif
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/olddirent.h b/sysdeps/unix/sysv/linux/olddirent.h
index 239f790648..065ca41a6e 100644
--- a/sysdeps/unix/sysv/linux/olddirent.h
+++ b/sysdeps/unix/sysv/linux/olddirent.h
@@ -34,8 +34,6 @@ extern struct __old_dirent64 *__old_readdir64 (DIR *__dirp);
 libc_hidden_proto (__old_readdir64);
 extern int __old_readdir64_r (DIR *__dirp, struct __old_dirent64 *__entry,
 			  struct __old_dirent64 **__result);
-extern __ssize_t __old_getdents64 (int __fd, char *__buf, size_t __nbytes)
-	attribute_hidden;
 int __old_scandir64 (const char * __dir,
 		     struct __old_dirent64 *** __namelist,
 		     int (*__selector) (const struct __old_dirent64 *),
diff --git a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
index 7223a93673..2d21fa2085 100644
--- a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
@@ -167,6 +167,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist
index 959e59e7e7..c356a11b1c 100644
--- a/sysdeps/unix/sysv/linux/or1k/libc.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist
@@ -2259,3 +2259,5 @@ GLIBC_2.40 getcontext F
 GLIBC_2.40 makecontext F
 GLIBC_2.40 setcontext F
 GLIBC_2.40 swapcontext F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/or1k/syscall_cancel.S b/sysdeps/unix/sysv/linux/or1k/syscall_cancel.S
new file mode 100644
index 0000000000..876f5e05ab
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/or1k/syscall_cancel.S
@@ -0,0 +1,63 @@
+/* Cancellable syscall wrapper.  Linux/or1k version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+ENTRY (__syscall_cancel_arch)
+	l.addi	r1, r1, -4
+	cfi_def_cfa_offset (4)
+	l.sw	0(r1), r9
+	cfi_offset (9, -4)
+
+	.global __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	l.movhi	r19, hi(0)
+	l.lwz	r17, 0(r3)
+	l.andi	r17, r17, 8
+	l.sfeq	r17, r19
+	l.bnf	1f
+
+	/* Issue a 6 argument syscall.  */
+	l.or	r11, r4, r4
+	l.or	r3, r5, r5
+	l.or	r4, r6, r6
+	l.or	r5, r7, r7
+	l.or	r6, r8, r8
+	l.lwz	r7, 4(r1)
+	l.lwz	r8, 8(r1)
+	l.sys	1
+	 l.nop
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	l.lwz	r9, 0(r1)
+	l.jr	r9
+	l.addi	r1, r1, 4
+	cfi_remember_state
+	cfi_def_cfa_offset (0)
+	cfi_restore (9)
+1:
+	cfi_restore_state
+	l.jal	__syscall_do_cancel
+	 l.nop
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/pkey_get.c b/sysdeps/unix/sysv/linux/pkey_get.c
index 743bd09268..ed9b6297d1 100644
--- a/sysdeps/unix/sysv/linux/pkey_get.c
+++ b/sysdeps/unix/sysv/linux/pkey_get.c
@@ -17,10 +17,13 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <errno.h>
+#include <sys/mman.h>
 
 int
-pkey_get (int key)
+__pkey_get (int key)
 {
   __set_errno (ENOSYS);
   return -1;
 }
+libc_hidden_def (__pkey_get)
+weak_alias (__pkey_get, pkey_get)
diff --git a/sysdeps/unix/sysv/linux/pkey_mprotect.c b/sysdeps/unix/sysv/linux/pkey_mprotect.c
index b7afa7946d..e321a7eb1b 100644
--- a/sysdeps/unix/sysv/linux/pkey_mprotect.c
+++ b/sysdeps/unix/sysv/linux/pkey_mprotect.c
@@ -22,7 +22,7 @@
 #include <sysdep.h>
 
 int
-pkey_mprotect (void *addr, size_t len, int prot, int pkey)
+__pkey_mprotect (void *addr, size_t len, int prot, int pkey)
 {
   if (pkey == -1)
     /* If the key is -1, the system call is precisely equivalent to
@@ -30,3 +30,5 @@ pkey_mprotect (void *addr, size_t len, int prot, int pkey)
     return __mprotect (addr, len, prot);
   return INLINE_SYSCALL_CALL (pkey_mprotect, addr, len, prot, pkey);
 }
+libc_hidden_def (__pkey_mprotect)
+weak_alias (__pkey_mprotect, pkey_mprotect)
diff --git a/sysdeps/unix/sysv/linux/pkey_set.c b/sysdeps/unix/sysv/linux/pkey_set.c
index d5d59e3031..30463ef89b 100644
--- a/sysdeps/unix/sysv/linux/pkey_set.c
+++ b/sysdeps/unix/sysv/linux/pkey_set.c
@@ -17,10 +17,13 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <errno.h>
+#include <sys/mman.h>
 
 int
-pkey_set (int key, unsigned int access_rights)
+__pkey_set (int key, unsigned int access_rights)
 {
   __set_errno (ENOSYS);
   return -1;
 }
+libc_hidden_def (__pkey_set)
+weak_alias (__pkey_set, pkey_set)
diff --git a/sysdeps/unix/sysv/linux/powerpc/cancellation-pc-check.h b/sysdeps/unix/sysv/linux/powerpc/cancellation-pc-check.h
new file mode 100644
index 0000000000..1175e1a070
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/cancellation-pc-check.h
@@ -0,0 +1,65 @@
+/* Architecture specific code for pthread cancellation handling.
+   Linux/PowerPC version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NPTL_CANCELLATION_PC_CHECK
+#define _NPTL_CANCELLATION_PC_CHECK
+
+#include <sigcontextinfo.h>
+
+/* For syscalls with side-effects (e.g read that might return partial read),
+   the kernel cannot restart the syscall when interrupted by a signal, it must
+   return from the call with whatever partial result.  In this case, the saved
+   program counter is set just after the syscall instruction, so the SIGCANCEL
+   handler should not act on cancellation.
+
+   The __syscall_cancel_arch function, used for all cancellable syscalls,
+   contains two extra markers, __syscall_cancel_arch_start and
+   __syscall_cancel_arch_end.  The former points to just before the initial
+   conditional branch that checks if the thread has received a cancellation
+   request, while former points to the instruction after the one responsible
+   to issue the syscall.
+
+   The function check if the program counter (PC) from ucontext_t CTX is
+   within the start and then end boundary from the __syscall_cancel_arch
+   bridge.  Return TRUE if the PC is within the boundary, meaning the
+   syscall does not have any side effects; or FALSE otherwise.  */
+
+static __always_inline bool
+cancellation_pc_check (void *ctx)
+{
+  /* Both are defined in syscall_cancel.S.  */
+  extern const char __syscall_cancel_arch_start[1];
+  extern const char __syscall_cancel_arch_end_sc[1];
+#if defined(USE_PPC_SVC) && defined(__powerpc64__)
+  extern const char __syscall_cancel_arch_end_svc[1];
+#endif
+
+  uintptr_t pc = sigcontext_get_pc (ctx);
+
+  return pc >= (uintptr_t) __syscall_cancel_arch_start
+#if defined(USE_PPC_SVC) && defined(__powerpc64__)
+	 && THREAD_GET_HWCAP() & PPC_FEATURE2_SCV
+	    ? pc < (uintptr_t) __syscall_cancel_arch_end_sc
+	    : pc < (uintptr_t) __syscall_cancel_arch_end_svc;
+#else
+	 && pc < (uintptr_t) __syscall_cancel_arch_end_sc;
+#endif
+}
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
index af0d2b121e..206d9fd656 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
@@ -211,6 +211,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 263
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
index 9714305608..7937f94cf0 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
@@ -3138,6 +3138,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
index 0beb52c542..d6e35f31d2 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
@@ -3183,6 +3183,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
index a4c70aa7fe..19f72a7f69 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
@@ -195,6 +195,7 @@
 #define __NR_mq_timedsend 264
 #define __NR_mq_unlink 263
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
index cfc2ebd3ec..2268d6890d 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
@@ -2892,6 +2892,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
index 8c9efc5a16..7f61b14bc8 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
@@ -2968,3 +2968,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c
index 217ecd004f..ac9ea3a1c1 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c
@@ -21,7 +21,7 @@
 #include <sys/mman.h>
 
 int
-pkey_get (int key)
+__pkey_get (int key)
 {
   if (key < 0 || key > PKEY_MAX)
     {
@@ -40,3 +40,5 @@ pkey_get (int key)
     return PKEY_DISABLE_WRITE;
   return 0;
 }
+libc_hidden_def (__pkey_get)
+weak_alias (__pkey_get, pkey_get)
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c
index cc67b0acff..ac10b5de22 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c
@@ -21,7 +21,7 @@
 #include <sys/mman.h>
 
 int
-pkey_set (int key, unsigned int rights)
+__pkey_set (int key, unsigned int rights)
 {
   if (key < 0 || key > PKEY_MAX || rights > 3)
     {
@@ -46,3 +46,5 @@ pkey_set (int key, unsigned int rights)
   pkey_write (amr);
   return 0;
 }
+libc_hidden_def (__pkey_set)
+weak_alias (__pkey_set, pkey_set)
diff --git a/sysdeps/unix/sysv/linux/powerpc/syscall_cancel.S b/sysdeps/unix/sysv/linux/powerpc/syscall_cancel.S
new file mode 100644
index 0000000000..1f119d0889
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/syscall_cancel.S
@@ -0,0 +1,86 @@
+/* Cancellable syscall wrapper.  Linux/powerpc version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int [r3] __syscall_cancel_arch (int *cancelhandling [r3],
+					long int nr   [r4],
+					long int arg1 [r5],
+					long int arg2 [r6],
+					long int arg3 [r7],
+					long int arg4 [r8],
+					long int arg5 [r9],
+					long int arg6 [r10])  */
+
+ENTRY (__syscall_cancel_arch)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	lwz     r0,0(r3)
+	andi.   r0,r0,TCB_CANCELED_BITMASK
+	bne     1f
+
+	/* Issue a 6 argument syscall, the nr [r4] being the syscall
+	   number.  */
+	mr      r0,r4
+	mr      r3,r5
+	mr      r4,r6
+	mr      r5,r7
+	mr      r6,r8
+	mr      r7,r9
+	mr      r8,r10
+
+#if defined(USE_PPC_SVC) && defined(__powerpc64__)
+	CHECK_SCV_SUPPORT r9 0f
+
+	stdu	r1, -SCV_FRAME_SIZE(r1)
+	cfi_adjust_cfa_offset (SCV_FRAME_SIZE)
+	.machine "push"
+	.machine "power9"
+	scv	0
+	.machine "pop"
+	.globl __syscall_cancel_arch_end_svc
+__syscall_cancel_arch_end_svc:
+	ld	r9, SCV_FRAME_SIZE + FRAME_LR_SAVE(r1)
+	mtlr	r9
+	addi	r1, r1, SCV_FRAME_SIZE
+	cfi_restore (lr)
+	li	r9, -4095
+	cmpld	r3, r9
+	bnslr+
+	neg	r3,r3
+	blr
+0:
+#endif
+	sc
+	.globl __syscall_cancel_arch_end_sc
+__syscall_cancel_arch_end_sc:
+	bnslr+
+	neg	r3,r3
+	blr
+
+	/* Although the __syscall_do_cancel do not return, we need to stack
+	   being set correctly for unwind.  */
+1:
+	TAIL_CALL_NO_RETURN (__syscall_do_cancel)
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/readdir64.c b/sysdeps/unix/sysv/linux/readdir64.c
index e6f5108c0a..e6b8867b7a 100644
--- a/sysdeps/unix/sysv/linux/readdir64.c
+++ b/sysdeps/unix/sysv/linux/readdir64.c
@@ -26,17 +26,13 @@
 #undef __readdir
 #undef readdir
 
-/* Read a directory entry from DIRP.  */
-struct dirent64 *
-__readdir64 (DIR *dirp)
+/* Read a directory entry from DIRP.  No locking.  */
+static struct dirent64 *
+__readdir64_unlocked (DIR *dirp)
 {
   struct dirent64 *dp;
   int saved_errno = errno;
 
-#if IS_IN (libc)
-  __libc_lock_lock (dirp->lock);
-#endif
-
   if (dirp->offset >= dirp->size)
     {
       /* We've emptied out our buffer.  Refill it.  */
@@ -53,9 +49,6 @@ __readdir64 (DIR *dirp)
 	     do not set errno in that case, to indicate success.  */
 	  if (bytes == 0 || errno == ENOENT)
 	    __set_errno (saved_errno);
-#if IS_IN (libc)
-	  __libc_lock_unlock (dirp->lock);
-#endif
 	  return NULL;
 	}
       dirp->size = (size_t) bytes;
@@ -68,10 +61,16 @@ __readdir64 (DIR *dirp)
   dirp->offset += dp->d_reclen;
   dirp->filepos = dp->d_off;
 
-#if IS_IN (libc)
-  __libc_lock_unlock (dirp->lock);
-#endif
+  return dp;
+}
 
+/* Read a directory entry from DIRP.  */
+struct dirent64 *
+__readdir64 (DIR *dirp)
+{
+  __libc_lock_lock (dirp->lock);
+  struct dirent64 *dp = __readdir64_unlocked (dirp);
+  __libc_lock_unlock (dirp->lock);
   return dp;
 }
 libc_hidden_def (__readdir64)
@@ -99,45 +98,54 @@ __old_readdir64 (DIR *dirp)
   struct __old_dirent64 *dp;
   int saved_errno = errno;
 
-#if IS_IN (libc)
   __libc_lock_lock (dirp->lock);
-#endif
 
-  if (dirp->offset >= dirp->size)
+  while (1)
     {
-      /* We've emptied out our buffer.  Refill it.  */
+      errno = 0;
+      struct dirent64 *newdp = __readdir64_unlocked (dirp);
+      if (newdp == NULL)
+	{
+	  if (errno == 0 && dirp->errcode != 0)
+	    __set_errno (dirp->errcode);
+	  else if (errno == 0)
+	    __set_errno (saved_errno);
+	  dp = NULL;
+	  break;
+	}
 
-      size_t maxread = dirp->allocation;
-      ssize_t bytes;
+      /* Convert to the target layout.  Use a separate struct and
+	 memcpy to side-step aliasing issues.  */
+      struct __old_dirent64 result;
+      result.d_ino = newdp->d_ino;
+      result.d_off = newdp->d_off;
+      result.d_reclen = newdp->d_reclen;
+      result.d_type = newdp->d_type;
 
-      bytes = __old_getdents64 (dirp->fd, dirp->data, maxread);
-      if (bytes <= 0)
+      /* Check for ino_t overflow.  */
+      if (__glibc_unlikely (result.d_ino != newdp->d_ino))
 	{
-	  /* Linux may fail with ENOENT on some file systems if the
-	     directory inode is marked as dead (deleted).  POSIX
-	     treats this as a regular end-of-directory condition, so
-	     do not set errno in that case, to indicate success.  */
-	  if (bytes == 0 || errno == ENOENT)
-	    __set_errno (saved_errno);
-#if IS_IN (libc)
-	  __libc_lock_unlock (dirp->lock);
-#endif
-	  return NULL;
+	  dirp->errcode = ENAMETOOLONG;
+	  continue;
 	}
-      dirp->size = (size_t) bytes;
 
-      /* Reset the offset into the buffer.  */
-      dirp->offset = 0;
-    }
+      /* Overwrite the fixed-sized part.  */
+      dp = (struct __old_dirent64 *) newdp;
+      memcpy (dp, &result, offsetof (struct __old_dirent64, d_name));
 
-  dp = (struct __old_dirent64 *) &dirp->data[dirp->offset];
-  dirp->offset += dp->d_reclen;
-  dirp->filepos = dp->d_off;
+      /* Move  the name.  */
+      _Static_assert (offsetof (struct __old_dirent64, d_name)
+		      <= offsetof (struct dirent64, d_name),
+		      "old struct must be smaller");
+      if (offsetof (struct __old_dirent64, d_name)
+	  != offsetof (struct dirent64, d_name))
+	memmove (dp->d_name, newdp->d_name, strlen (newdp->d_name) + 1);
 
-#if IS_IN (libc)
-  __libc_lock_unlock (dirp->lock);
-#endif
+      __set_errno (saved_errno);
+      break;
+    }
 
+  __libc_lock_unlock (dirp->lock);
   return dp;
 }
 libc_hidden_def (__old_readdir64)
diff --git a/sysdeps/unix/sysv/linux/readdir64_r.c b/sysdeps/unix/sysv/linux/readdir64_r.c
index e87882ee06..c42a161ffc 100644
--- a/sysdeps/unix/sysv/linux/readdir64_r.c
+++ b/sysdeps/unix/sysv/linux/readdir64_r.c
@@ -37,7 +37,7 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result)
 
   __libc_lock_lock (dirp->lock);
 
-  do
+  while (1)
     {
       if (dirp->offset >= dirp->size)
 	{
@@ -79,26 +79,21 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result)
 
       dirp->filepos = dp->d_off;
 
-      if (reclen > offsetof (struct dirent64, d_name) + NAME_MAX + 1)
+      if (reclen <= offsetof (struct dirent64, d_name) + NAME_MAX + 1)
+	break;
+
+      /* The record is very long.  It could still fit into the
+	 caller-supplied buffer if we can skip padding at the end.  */
+      size_t namelen = _D_EXACT_NAMLEN (dp);
+      if (namelen <= NAME_MAX)
 	{
-	  /* The record is very long.  It could still fit into the
-	     caller-supplied buffer if we can skip padding at the
-	     end.  */
-	  size_t namelen = _D_EXACT_NAMLEN (dp);
-	  if (namelen <= NAME_MAX)
-	    reclen = offsetof (struct dirent64, d_name) + namelen + 1;
-	  else
-	    {
-	      /* The name is too long.  Ignore this file.  */
-	      dirp->errcode = ENAMETOOLONG;
-	      dp->d_ino = 0;
-	      continue;
-	    }
+	  reclen = offsetof (struct dirent64, d_name) + namelen + 1;
+	  break;
 	}
 
-      /* Skip deleted and ignored files.  */
+      /* The name is too long.  Ignore this file.  */
+      dirp->errcode = ENAMETOOLONG;
     }
-  while (dp->d_ino == 0);
 
   if (dp != NULL)
     {
@@ -135,91 +130,37 @@ attribute_compat_text_section
 __old_readdir64_r (DIR *dirp, struct __old_dirent64 *entry,
 		   struct __old_dirent64 **result)
 {
-  struct __old_dirent64 *dp;
-  size_t reclen;
-  const int saved_errno = errno;
-  int ret;
-
-  __libc_lock_lock (dirp->lock);
-
-  do
+  while (1)
     {
-      if (dirp->offset >= dirp->size)
-	{
-	  /* We've emptied out our buffer.  Refill it.  */
-
-	  size_t maxread = dirp->allocation;
-	  ssize_t bytes;
-
-	  maxread = dirp->allocation;
-
-	  bytes = __old_getdents64 (dirp->fd, dirp->data, maxread);
-	  if (bytes <= 0)
-	    {
-	      /* On some systems getdents fails with ENOENT when the
-		 open directory has been rmdir'd already.  POSIX.1
-		 requires that we treat this condition like normal EOF.  */
-	      if (bytes < 0 && errno == ENOENT)
-		{
-		  bytes = 0;
-		  __set_errno (saved_errno);
-		}
-	      if (bytes < 0)
-		dirp->errcode = errno;
-
-	      dp = NULL;
-	      break;
-	    }
-	  dirp->size = (size_t) bytes;
+      struct dirent64 new_entry;
+      struct dirent64 *newp;
+      int ret = __readdir64_r (dirp, &new_entry, &newp);
 
-	  /* Reset the offset into the buffer.  */
-	  dirp->offset = 0;
+      if (ret != 0)
+	return ret;
+      else if (newp == NULL)
+	{
+	  *result = NULL;
+	  return 0;
 	}
-
-      dp = (struct __old_dirent64 *) &dirp->data[dirp->offset];
-
-      reclen = dp->d_reclen;
-
-      dirp->offset += reclen;
-
-      dirp->filepos = dp->d_off;
-
-      if (reclen > offsetof (struct __old_dirent64, d_name) + NAME_MAX + 1)
+      else
 	{
-	  /* The record is very long.  It could still fit into the
-	     caller-supplied buffer if we can skip padding at the
-	     end.  */
-	  size_t namelen = _D_EXACT_NAMLEN (dp);
-	  if (namelen <= NAME_MAX)
-	    reclen = offsetof (struct __old_dirent64, d_name) + namelen + 1;
-	  else
+	  entry->d_ino = newp->d_ino;
+	  if (entry->d_ino != newp->d_ino)
 	    {
-	      /* The name is too long.  Ignore this file.  */
-	      dirp->errcode = ENAMETOOLONG;
-	      dp->d_ino = 0;
+	      dirp->errcode = EOVERFLOW;
 	      continue;
 	    }
+	  size_t namelen = strlen (newp->d_name);
+	  entry->d_off = newp->d_off;
+	  entry->d_reclen = (offsetof (struct __old_dirent64, d_name)
+			     + namelen + 1);
+	  entry->d_type = newp->d_type;
+	  memcpy (entry->d_name, newp->d_name, namelen + 1);
+	  *result = entry;
+	  return 0;
 	}
-
-      /* Skip deleted and ignored files.  */
     }
-  while (dp->d_ino == 0);
-
-  if (dp != NULL)
-    {
-      *result = memcpy (entry, dp, reclen);
-      entry->d_reclen = reclen;
-      ret = 0;
-    }
-  else
-    {
-      *result = NULL;
-      ret = dirp->errcode;
-    }
-
-  __libc_lock_unlock (dirp->lock);
-
-  return ret;
 }
 
 compat_symbol (libc, __old_readdir64_r, readdir64_r, GLIBC_2_1);
diff --git a/sysdeps/unix/sysv/linux/readdir_r.c b/sysdeps/unix/sysv/linux/readdir_r.c
index ffd5262cf5..1d595688f7 100644
--- a/sysdeps/unix/sysv/linux/readdir_r.c
+++ b/sysdeps/unix/sysv/linux/readdir_r.c
@@ -25,14 +25,22 @@ __readdir_r (DIR *dirp, struct dirent *entry, struct dirent **result)
 {
   struct dirent *dp;
   size_t reclen;
+  int saved_errno = errno;
 
   __libc_lock_lock (dirp->lock);
 
   while (1)
     {
+      /* If errno is changed from 0, the NULL return value indicates
+	 an actual error.  It overrides a pending ENAMETOOLONG error.  */
+      __set_errno (0);
       dp = __readdir_unlocked (dirp);
       if (dp == NULL)
-	break;
+	{
+	  if (errno != 0)
+	    dirp->errcode = errno;
+	  break;
+	}
 
       reclen = dp->d_reclen;
       if (reclen <= offsetof (struct dirent, d_name) + NAME_MAX + 1)
@@ -61,6 +69,7 @@ __readdir_r (DIR *dirp, struct dirent *entry, struct dirent **result)
 
   __libc_lock_unlock (dirp->lock);
 
+  __set_errno (saved_errno);
   return dp != NULL ? 0 : dirp->errcode;
 }
 
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
index 7315d164d6..eb9e57b028 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
@@ -153,6 +153,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
index 6397a9cb91..4187241f50 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
@@ -2512,3 +2512,5 @@ GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
 GLIBC_2.40 __riscv_hwprobe F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
index 31a1130db9..1eac18e582 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
@@ -158,6 +158,7 @@
 #define __NR_mq_timedsend 182
 #define __NR_mq_unlink 181
 #define __NR_mremap 216
+#define __NR_mseal 462
 #define __NR_msgctl 187
 #define __NR_msgget 186
 #define __NR_msgrcv 188
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
index 71bbf94f66..8935beccac 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
@@ -2712,3 +2712,5 @@ GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
 GLIBC_2.40 __riscv_hwprobe F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/unix/sysv/linux/riscv/syscall_cancel.S b/sysdeps/unix/sysv/linux/riscv/syscall_cancel.S
new file mode 100644
index 0000000000..742c748d09
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/syscall_cancel.S
@@ -0,0 +1,67 @@
+/* Cancellable syscall wrapper.  Linux/riscv version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+#ifdef SHARED
+	.option pic
+#else
+	.option nopic
+#endif
+
+ENTRY (__syscall_cancel_arch)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	lw	t1, 0(a0)
+	/* if (*ch & CANCELED_BITMASK)  */
+	andi	t1, t1, TCB_CANCELED_BITMASK
+	bne	t1, zero, 1f
+
+	mv	t3, a1
+	mv	a0, a2
+	mv	a1, a3
+	mv	a2, a4
+	mv	a3, a5
+	mv	a4, a6
+	mv	a5, a7
+	mv	a7, t3
+	scall
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	ret
+
+1:
+	addi	sp, sp, -16
+	cfi_def_cfa_offset (16)
+	REG_S	ra, (16-SZREG)(sp)
+	cfi_offset (ra, -SZREG)
+	call	__syscall_do_cancel
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
index cf8569304d..464eca58b2 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
@@ -214,6 +214,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 272
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
index a7467e2850..e69dc7ccf6 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
@@ -3136,6 +3136,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/syscall_cancel.S b/sysdeps/unix/sysv/linux/s390/s390-32/syscall_cancel.S
new file mode 100644
index 0000000000..9e0ad2a635
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/syscall_cancel.S
@@ -0,0 +1,62 @@
+/* Cancellable syscall wrapper.  Linux/s390 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+	stm	%r6,%r7,24(%r15)
+	cfi_offset (%r6, -72)
+	cfi_offset (%r7, -68)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	tm	3(%r2),TCB_CANCELED_BITMASK
+	jne	1f
+
+	/* Issue a 6 argument syscall, the nr [%r1] being the syscall
+	   number.  */
+	lr	%r1,%r3
+	lr	%r2,%r4
+	lr	%r3,%r5
+	lr	%r4,%r6
+	lm	%r5,%r7,96(%r15)
+	svc	0
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	lm	%r6,%r7,24(%r15)
+	cfi_remember_state
+	cfi_restore (%r7)
+	cfi_restore (%r6)
+	br	%r14
+1:
+	cfi_restore_state
+	jg	__syscall_do_cancel
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
index f3536ed03f..57842702fd 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
@@ -185,6 +185,7 @@
 #define __NR_mq_timedsend 273
 #define __NR_mq_unlink 272
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
index fd1cb2972d..7d860001d8 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
@@ -2929,6 +2929,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/syscall_cancel.S b/sysdeps/unix/sysv/linux/s390/s390-64/syscall_cancel.S
new file mode 100644
index 0000000000..e1620add6a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/syscall_cancel.S
@@ -0,0 +1,62 @@
+/* Cancellable syscall wrapper.  Linux/s390x version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   __syscall_arg_t nr,
+				   __syscall_arg_t arg1,
+				   __syscall_arg_t arg2,
+				   __syscall_arg_t arg3,
+				   __syscall_arg_t arg4,
+				   __syscall_arg_t arg5,
+				   __syscall_arg_t arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+	stmg	%r6,%r7,48(%r15)
+	cfi_offset (%r6, -112)
+	cfi_offset (%r7, -104)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	tm	3(%r2),TCB_CANCELED_BITMASK
+	jne	1f
+
+	/* Issue a 6 argument syscall, the nr [%r1] being the syscall
+	   number.  */
+	lgr	%r1,%r3
+	lgr	%r2,%r4
+	lgr	%r3,%r5
+	lgr	%r4,%r6
+	lmg	%r5,%r7,160(%r15)
+	svc	0
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	lmg	%r6,%r7,48(%r15)
+	cfi_remember_state
+	cfi_restore (%r7)
+	cfi_restore (%r6)
+	br	%r14
+1:
+	cfi_restore_state
+	jg	__syscall_do_cancel
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/sched_getattr.c b/sysdeps/unix/sysv/linux/sched_getattr.c
new file mode 100644
index 0000000000..e6b9970fcb
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sched_getattr.c
@@ -0,0 +1,27 @@
+/* Reading scheduling policy and attributes.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sched.h>
+#include <sysdep.h>
+
+int
+sched_getattr (pid_t pid, struct sched_attr *attr, unsigned int size,
+               unsigned int flags)
+{
+  return INLINE_SYSCALL_CALL (sched_getattr, pid, attr, size, flags);
+}
diff --git a/sysdeps/unix/sysv/linux/sched_setattr.c b/sysdeps/unix/sysv/linux/sched_setattr.c
new file mode 100644
index 0000000000..25403fb454
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sched_setattr.c
@@ -0,0 +1,26 @@
+/* Setting scheduling policy and attributes.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sched.h>
+#include <sysdep.h>
+
+int
+sched_setattr (pid_t pid, struct sched_attr *attr, unsigned int flags)
+{
+  return INLINE_SYSCALL_CALL (sched_setattr, pid, attr, flags);
+}
diff --git a/sysdeps/unix/sysv/linux/sh/arch-syscall.h b/sysdeps/unix/sysv/linux/sh/arch-syscall.h
index 0c88bf10c7..165ba017c7 100644
--- a/sysdeps/unix/sysv/linux/sh/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sh/arch-syscall.h
@@ -206,6 +206,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 278
 #define __NR_mremap 163
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
@@ -378,6 +379,7 @@
 #define __NR_symlinkat 304
 #define __NR_sync 36
 #define __NR_sync_file_range 314
+#define __NR_sync_file_range2 388
 #define __NR_syncfs 362
 #define __NR_sysfs 135
 #define __NR_sysinfo 116
diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
index ff6e6b1a13..fcb8161841 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
@@ -2828,6 +2828,8 @@ GLIBC_2.4 sys_errlist D 0x210
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
index 449d92bbc5..3fd078d125 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
@@ -2825,6 +2825,8 @@ GLIBC_2.4 sys_errlist D 0x210
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/sh/syscall_cancel.S b/sysdeps/unix/sysv/linux/sh/syscall_cancel.S
new file mode 100644
index 0000000000..2afd23928d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sh/syscall_cancel.S
@@ -0,0 +1,126 @@
+/* Cancellable syscall wrapper.  Linux/sh version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   long int nr,
+				   long int arg1,
+				   long int arg2,
+				   long int arg3,
+				   long int arg4,
+				   long int arg5,
+				   long int arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+
+#ifdef SHARED
+	mov.l	r12,@-r15
+	cfi_def_cfa_offset (4)
+	cfi_offset (12, -4)
+	mova	L(GT),r0
+	mov.l	L(GT),r12
+	sts.l	pr,@-r15
+	cfi_def_cfa_offset (8)
+	cfi_offset (17, -8)
+	add	r0,r12
+#else
+	sts.l	pr,@-r15
+	cfi_def_cfa_offset (4)
+	cfi_offset (17, -4)
+#endif
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	mov.l	@r4,r0
+	tst	#TCB_CANCELED_BITMASK,r0
+	bf/s	1f
+
+	/* Issue a 6 argument syscall.  */
+	mov	r5,r3
+	mov	r6,r4
+	mov	r7,r5
+#ifdef SHARED
+	mov.l	@(8,r15),r6
+	mov.l	@(12,r15),r7
+	mov.l	@(16,r15),r0
+	mov.l	@(20,r15),r1
+#else
+	mov.l	@(4,r15),r6
+	mov.l	@(8,r15),r7
+	mov.l	@(12,r15),r0
+	mov.l	@(16,r15),r1
+#endif
+	trapa	#0x16
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	/* The additional or is a workaround for a hardware issue:
+	   http://documentation.renesas.com/eng/products/mpumcu/tu/tnsh7456ae.pdf
+	 */
+	or	r0,r0
+	or	r0,r0
+	or	r0,r0
+	or	r0,r0
+	or	r0,r0
+
+	lds.l	@r15+,pr
+	cfi_remember_state
+	cfi_restore (17)
+#ifdef SHARED
+	cfi_def_cfa_offset (4)
+	rts
+	mov.l	@r15+,r12
+	cfi_def_cfa_offset (0)
+	cfi_restore (12)
+	.align 1
+1:
+	cfi_restore_state
+	mov.l	L(SC),r1
+	bsrf	r1
+L(M):
+	nop
+
+	.align 2
+L(GT):
+	.long	_GLOBAL_OFFSET_TABLE_
+L(SC):
+	.long	__syscall_do_cancel-(L(M)+2)
+#else
+	cfi_def_cfa_offset (0)
+	rts
+	nop
+
+	.align 1
+1:
+	cfi_restore_state
+	mov.l	2f,r1
+	jsr	@r1
+	nop
+
+	.align 2
+2:
+	.long	__syscall_do_cancel
+#endif
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/socketcall.h b/sysdeps/unix/sysv/linux/socketcall.h
index 537fa43678..0efa5ee9e4 100644
--- a/sysdeps/unix/sysv/linux/socketcall.h
+++ b/sysdeps/unix/sysv/linux/socketcall.h
@@ -88,14 +88,33 @@
     sc_ret;								\
   })
 
-
-#define SOCKETCALL_CANCEL(name, args...)				\
-  ({									\
-    int oldtype = LIBC_CANCEL_ASYNC ();					\
-    long int sc_ret = __SOCKETCALL (SOCKOP_##name, args);		\
-    LIBC_CANCEL_RESET (oldtype);					\
-    sc_ret;								\
-  })
+#define __SOCKETCALL_CANCEL1(__name, __a1) \
+  SYSCALL_CANCEL (socketcall, __name, \
+     ((long int [1]) { (long int) __a1 }))
+#define __SOCKETCALL_CANCEL2(__name, __a1, __a2) \
+  SYSCALL_CANCEL (socketcall, __name, \
+     ((long int [2]) { (long int) __a1, (long int) __a2 }))
+#define __SOCKETCALL_CANCEL3(__name, __a1, __a2, __a3) \
+  SYSCALL_CANCEL (socketcall, __name, \
+     ((long int [3]) { (long int) __a1, (long int) __a2, (long int) __a3 }))
+#define __SOCKETCALL_CANCEL4(__name, __a1, __a2, __a3, __a4) \
+  SYSCALL_CANCEL (socketcall, __name, \
+     ((long int [4]) { (long int) __a1, (long int) __a2, (long int) __a3, \
+                       (long int) __a4 }))
+#define __SOCKETCALL_CANCEL5(__name, __a1, __a2, __a3, __a4, __a5) \
+  SYSCALL_CANCEL (socketcall, __name, \
+     ((long int [5]) { (long int) __a1, (long int) __a2, (long int) __a3, \
+                       (long int) __a4, (long int) __a5 }))
+#define __SOCKETCALL_CANCEL6(__name, __a1, __a2, __a3, __a4, __a5, __a6) \
+  SYSCALL_CANCEL (socketcall, __name, \
+     ((long int [6]) { (long int) __a1, (long int) __a2, (long int) __a3, \
+                       (long int) __a4, (long int) __a5, (long int) __a6 }))
+
+#define __SOCKETCALL_CANCEL(...) __SOCKETCALL_DISP (__SOCKETCALL_CANCEL,\
+						    __VA_ARGS__)
+
+#define SOCKETCALL_CANCEL(name, args...) \
+   __SOCKETCALL_CANCEL (SOCKOP_##name, args)
 
 
 #endif /* sys/socketcall.h */
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
index 19fa614624..3bad6f102f 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
@@ -211,6 +211,7 @@
 #define __NR_mq_timedsend_time64 418
 #define __NR_mq_unlink 274
 #define __NR_mremap 250
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S
index 748d25fcfe..c84244f56b 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S
@@ -28,6 +28,13 @@
 	.text
 ENTRY (__clone)
 	save	%sp,-96,%sp
+	save	%sp,-96,%sp
+#ifdef __sparcv9
+	flushw
+#else
+	ta 3
+#endif
+	restore
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
 	cfi_register(%o7, %i7)
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
index e615be759a..1ce1fe9da7 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
@@ -3157,6 +3157,8 @@ GLIBC_2.4 wcstold F
 GLIBC_2.4 wcstold_l F
 GLIBC_2.4 wprintf F
 GLIBC_2.4 wscanf F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S
new file mode 100644
index 0000000000..45c6ae7a86
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S
@@ -0,0 +1,80 @@
+/* Cancellable syscall wrapper.  Linux/sparc32 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   long int nr,
+				   long int arg1,
+				   long int arg2,
+				   long int arg3,
+				   long int arg4,
+				   long int arg5,
+				   long int arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+	save	%sp, -96, %sp
+
+	cfi_window_save
+	cfi_register (%o7, %i7)
+	cfi_def_cfa_register (%fp)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	ld	[%i0], %g2
+	andcc	%g2, TCB_CANCELED_BITMASK, %g0
+#ifdef __sparcv9
+	bne,pn	%icc, 2f
+#else
+	bne	2f
+#endif
+	/* Issue a 6 argument syscall.  */
+	 mov	%i2, %o0
+	mov	%i3, %o1
+	mov	%i4, %o2
+	mov	%i5, %o3
+	ld	[%fp+92], %o4
+	ld	[%fp+96], %o5
+	 mov	%i1, %g1
+	ta	0x10
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	bcc	1f
+	 nop
+	sub	%g0, %o0, %o0
+1:
+	mov	%o0, %i0
+#ifdef __sparcv9
+	return	%i7+8
+	 nop
+#else
+	jmp	%i7+8
+	 restore
+#endif
+
+2:
+	call	__syscall_do_cancel, 0
+	 nop
+	nop
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h
index d2d68f5312..c2ffbb5c8f 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h
@@ -107,6 +107,7 @@ ENTRY(name);					\
 #else  /* __ASSEMBLER__ */
 
 #define __SYSCALL_STRING						\
+	"mov	%[scn], %%g1;"						\
 	"ta	0x10;"							\
 	"bcc	1f;"							\
 	" nop;"								\
@@ -114,7 +115,7 @@ ENTRY(name);					\
 	"1:"
 
 #define __SYSCALL_CLOBBERS						\
-	"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",			\
+	"g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",		\
 	"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",		\
 	"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",		\
 	"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",		\
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
index 18516f20cb..98e1437920 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
@@ -192,6 +192,7 @@
 #define __NR_mq_timedsend 275
 #define __NR_mq_unlink 274
 #define __NR_mremap 250
+#define __NR_mseal 462
 #define __NR_msgctl 402
 #define __NR_msgget 399
 #define __NR_msgrcv 401
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S
index e5ff2cf1a0..370d51fda2 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S
@@ -32,6 +32,9 @@
 
 ENTRY (__clone)
 	save	%sp, -192, %sp
+	save	%sp, -192, %sp
+	flushw
+	restore
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
 	cfi_register(%o7, %i7)
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
index bd36431dd7..07507b86f6 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
@@ -2793,6 +2793,8 @@ GLIBC_2.4 sys_errlist D 0x430
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S
new file mode 100644
index 0000000000..6c8d1330cb
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S
@@ -0,0 +1,74 @@
+/* Cancellable syscall wrapper.  Linux/sparc64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+	.register       %g2, #scratch
+
+/* long int __syscall_cancel_arch (int *cancelhandling,
+				   long int nr,
+				   long int arg1,
+				   long int arg2,
+				   long int arg3,
+				   long int arg4,
+				   long int arg5,
+				   long int arg6)  */
+
+ENTRY (__syscall_cancel_arch)
+	save	%sp, -176, %sp
+
+	cfi_window_save
+	cfi_register (%o7, %i7)
+	cfi_def_cfa_register (%fp)
+
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	lduw	[%i0], %g2
+	andcc	%g2, TCB_CANCELED_BITMASK, %g0
+	bne,pn	%xcc, 2f
+	/* Issue a 6 argument syscall.  */
+	 mov	%i2, %o0
+	mov	%i3, %o1
+	mov	%i4, %o2
+	mov	%i5, %o3
+	ldx	[%fp + STACK_BIAS + 176], %o4
+	ldx	[%fp + STACK_BIAS + 184], %o5
+	mov	%i1, %g1
+	ta	0x6d
+
+	.global __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+
+	bcc,pt	%xcc, 1f
+	 nop
+	sub	%g0, %o0, %o0
+1:
+	mov	%o0, %i0
+	return	%i7+8
+	 nop
+
+2:
+	call	__syscall_do_cancel, 0
+	 nop
+	nop
+
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h
index 96047424e9..5598fab08a 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h
@@ -106,6 +106,7 @@ ENTRY(name);					\
 #else  /* __ASSEMBLER__ */
 
 #define __SYSCALL_STRING						\
+	"mov	%[scn], %%g1;"						\
 	"ta	0x6d;"							\
 	"bcc,pt	%%xcc, 1f;"						\
 	" nop;"								\
@@ -113,7 +114,7 @@ ENTRY(name);					\
 	"1:"
 
 #define __SYSCALL_CLOBBERS						\
-	"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",			\
+	"g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",		\
 	"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",		\
 	"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",		\
 	"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",		\
diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h
index dcabb57fe2..c287740a8c 100644
--- a/sysdeps/unix/sysv/linux/sparc/sysdep.h
+++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h
@@ -50,97 +50,109 @@
 
 #undef INTERNAL_SYSCALL_NCS
 #define INTERNAL_SYSCALL_NCS(name, nr, args...) \
-  internal_syscall##nr(__SYSCALL_STRING, name, args)
+  _internal_syscall##nr(__SYSCALL_STRING, "p", name, args)
 
-#define internal_syscall0(string,name,dummy...)			\
+#define _internal_syscall0(string,nc,name,dummy...)	\
 ({									\
-	register long int __g1 __asm__ ("g1") = (name);			\
 	register long __o0 __asm__ ("o0");				\
+	long int _name = (long int) (name);				\
 	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1) :					\
+			  [scn] nc (_name) :				\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall0(string,name,args...)				\
+  _internal_syscall0(string, "i", name, args)
 
-#define internal_syscall1(string,name,arg1)				\
+#define _internal_syscall1(string,nc,name,arg1)				\
 ({									\
 	long int _arg1 = (long int) (arg1);				\
-	register long int __g1 __asm__("g1") = (name);			\
+	long int _name = (long int) (name);				\
 	register long int  __o0 __asm__ ("o0") = _arg1;			\
-	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1), "0" (__o0) :			\
+	__asm __volatile (string : "+r" (__o0) :			\
+			  [scn] nc (_name) :				\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall1(string,name,args...)				\
+  _internal_syscall1(string, "i", name, args)
 
-#define internal_syscall2(string,name,arg1,arg2)			\
+#define _internal_syscall2(string,nc,name,arg1,arg2)			\
 ({									\
 	long int _arg1 = (long int) (arg1);				\
 	long int _arg2 = (long int) (arg2);				\
-	register long int __g1 __asm__("g1") = (name);			\
+	long int _name = (long int) (name);				\
 	register long int __o0 __asm__ ("o0") = _arg1;			\
 	register long int __o1 __asm__ ("o1") = _arg2;			\
-	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1), "0" (__o0), "r" (__o1) :		\
+	__asm __volatile (string : "+r" (__o0) :			\
+			  [scn] nc (_name), "r" (__o1) :		\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall2(string,name,args...)				\
+  _internal_syscall2(string, "i", name, args)
 
-#define internal_syscall3(string,name,arg1,arg2,arg3)			\
+#define _internal_syscall3(string,nc,name,arg1,arg2,arg3)		\
 ({									\
 	long int _arg1 = (long int) (arg1);				\
 	long int _arg2 = (long int) (arg2);				\
 	long int _arg3 = (long int) (arg3);				\
-	register long int __g1 __asm__("g1") = (name);			\
+	long int _name = (long int) (name);				\
 	register long int __o0 __asm__ ("o0") = _arg1;			\
 	register long int __o1 __asm__ ("o1") = _arg2;			\
 	register long int __o2 __asm__ ("o2") = _arg3;			\
-	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1), "0" (__o0), "r" (__o1),		\
+	__asm __volatile (string : "+r" (__o0) :			\
+			  [scn] nc (_name), "r" (__o1),			\
 			  "r" (__o2) :					\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall3(string,name,args...)				\
+  _internal_syscall3(string, "i", name, args)
 
-#define internal_syscall4(string,name,arg1,arg2,arg3,arg4)		\
+#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4)		\
 ({									\
 	long int _arg1 = (long int) (arg1);				\
 	long int _arg2 = (long int) (arg2);				\
 	long int _arg3 = (long int) (arg3);				\
 	long int _arg4 = (long int) (arg4);				\
-	register long int __g1 __asm__("g1") = (name);			\
+	long int _name = (long int) (name);				\
 	register long int __o0 __asm__ ("o0") = _arg1;			\
 	register long int __o1 __asm__ ("o1") = _arg2;			\
 	register long int __o2 __asm__ ("o2") = _arg3;			\
 	register long int __o3 __asm__ ("o3") = _arg4;			\
-	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1), "0" (__o0), "r" (__o1),		\
+	__asm __volatile (string : "+r" (__o0) :			\
+			  [scn] nc (_name), "r" (__o1),			\
 			  "r" (__o2), "r" (__o3) :			\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall4(string,name,args...)				\
+  _internal_syscall4(string, "i", name, args)
 
-#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5)		\
+#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5)	\
 ({									\
 	long int _arg1 = (long int) (arg1);				\
 	long int _arg2 = (long int) (arg2);				\
 	long int _arg3 = (long int) (arg3);				\
 	long int _arg4 = (long int) (arg4);				\
 	long int _arg5 = (long int) (arg5);				\
-	register long int __g1 __asm__("g1") = (name);			\
+	long int _name = (long int) (name);				\
 	register long int __o0 __asm__ ("o0") = _arg1;			\
 	register long int __o1 __asm__ ("o1") = _arg2;			\
 	register long int __o2 __asm__ ("o2") = _arg3;			\
 	register long int __o3 __asm__ ("o3") = _arg4;			\
 	register long int __o4 __asm__ ("o4") = _arg5;			\
-	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1), "0" (__o0), "r" (__o1),		\
+	__asm __volatile (string : "+r" (__o0) :			\
+			  [scn] nc (_name), "r" (__o1),			\
 			  "r" (__o2), "r" (__o3), "r" (__o4) :		\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall5(string,name,args...)				\
+  _internal_syscall5(string, "i", name, args)
 
-#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6)	\
+#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\
 ({									\
 	long int _arg1 = (long int) (arg1);				\
 	long int _arg2 = (long int) (arg2);				\
@@ -148,20 +160,22 @@
 	long int _arg4 = (long int) (arg4);				\
 	long int _arg5 = (long int) (arg5);				\
 	long int _arg6 = (long int) (arg6);				\
-	register long int __g1 __asm__("g1") = (name);			\
+	long int _name = (long int) (name);				\
 	register long int __o0 __asm__ ("o0") = _arg1;			\
 	register long int __o1 __asm__ ("o1") = _arg2;			\
 	register long int __o2 __asm__ ("o2") = _arg3;			\
 	register long int __o3 __asm__ ("o3") = _arg4;			\
 	register long int __o4 __asm__ ("o4") = _arg5;			\
 	register long int __o5 __asm__ ("o5") = _arg6;			\
-	__asm __volatile (string : "=r" (__o0) :			\
-			  "r" (__g1), "0" (__o0), "r" (__o1),		\
+	__asm __volatile (string : "+r" (__o0) :			\
+			  [scn] nc (_name), "r" (__o1),			\
 			  "r" (__o2), "r" (__o3), "r" (__o4),		\
 			  "r" (__o5) :					\
 			  __SYSCALL_CLOBBERS);				\
 	__o0;								\
 })
+#define internal_syscall6(string,name,args...)				\
+  _internal_syscall6(string, "i", name, args)
 
 #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5)			\
 ({									\
@@ -170,15 +184,15 @@
 	long int _arg3 = (long int) (arg3);				\
 	long int _arg4 = (long int) (arg4);				\
 	long int _arg5 = (long int) (arg5);				\
+	long int _name = __NR_clone;					\
 	register long int __o0 __asm__ ("o0") = _arg1;			\
 	register long int __o1 __asm__ ("o1") = _arg2;			\
 	register long int __o2 __asm__ ("o2") = _arg3;			\
 	register long int __o3 __asm__ ("o3") = _arg4;			\
 	register long int __o4 __asm__ ("o4") = _arg5;			\
-	register long int __g1 __asm__ ("g1") = __NR_clone;		\
 	__asm __volatile (__SYSCALL_STRING :				\
 			  "=r" (__o0), "=r" (__o1) :			\
-			  "r" (__g1), "0" (__o0), "1" (__o1),		\
+			  [scn] "i" (_name), "0" (__o0), "1" (__o1),	\
 			  "r" (__o2), "r" (__o3), "r" (__o4) :		\
 			  __SYSCALL_CLOBBERS);				\
 	if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL))	\
diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c
index f57e92815e..1edf62c6d7 100644
--- a/sysdeps/unix/sysv/linux/spawni.c
+++ b/sysdeps/unix/sysv/linux/spawni.c
@@ -383,7 +383,11 @@ __spawnix (int *pid, const char *file,
   args.pidfd = 0;
   args.xflags = xflags;
 
-  internal_signal_block_all (&args.oldmask);
+  /* Avoid the potential issues if caller sets a SIG_IGN for SIGABRT, calls
+     abort, and another thread issues posix_spawn just after the sigaction
+     returns.  With default options (not setting POSIX_SPAWN_SETSIGDEF), the
+     process can still see SIG_DFL for SIGABRT, where it should be SIG_IGN.  */
+  __abort_lock_rdlock (&args.oldmask);
 
   /* The clone flags used will create a new child that will run in the same
      memory space (CLONE_VM) and the execution of calling thread will be
@@ -474,7 +478,7 @@ __spawnix (int *pid, const char *file,
   if ((ec == 0) && (pid != NULL))
     *pid = use_pidfd ? args.pidfd : new_pid;
 
-  internal_signal_restore_set (&args.oldmask);
+  __abort_lock_unlock (&args.oldmask);
 
   __pthread_setcancelstate (state, NULL);
 
diff --git a/sysdeps/unix/sysv/linux/sys/pidfd.h b/sysdeps/unix/sysv/linux/sys/pidfd.h
index 9f88d297e8..85d976939b 100644
--- a/sysdeps/unix/sysv/linux/sys/pidfd.h
+++ b/sysdeps/unix/sysv/linux/sys/pidfd.h
@@ -20,6 +20,7 @@
 
 #include <fcntl.h>
 #include <bits/types/siginfo_t.h>
+#include <sys/ioctl.h>
 
 #define PIDFD_NONBLOCK O_NONBLOCK
 #define PIDFD_THREAD O_EXCL
@@ -28,6 +29,19 @@
 #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
 #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
 
+#define PIDFS_IOCTL_MAGIC 0xFF
+
+#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
+#define PIDFD_GET_IPC_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 2)
+#define PIDFD_GET_MNT_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 3)
+#define PIDFD_GET_NET_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 4)
+#define PIDFD_GET_PID_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 5)
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE  _IO(PIDFS_IOCTL_MAGIC, 6)
+#define PIDFD_GET_TIME_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 7)
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
+#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
+
 /* Returns a file descriptor that refers to the process PID.  The
    close-on-exec is set on the file descriptor.  */
 extern int pidfd_open (__pid_t __pid, unsigned int __flags) __THROW;
diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list
index 672d39eaad..aa5b479e2a 100644
--- a/sysdeps/unix/sysv/linux/syscall-names.list
+++ b/sysdeps/unix/sysv/linux/syscall-names.list
@@ -21,8 +21,8 @@
 # This file can list all potential system calls.  The names are only
 # used if the installed kernel headers also provide them.
 
-# The list of system calls is current as of Linux 6.9.
-kernel 6.9
+# The list of system calls is current as of Linux 6.11.
+kernel 6.11
 
 FAST_atomic_update
 FAST_cmpxchg
@@ -287,6 +287,7 @@ mq_timedsend
 mq_timedsend_time64
 mq_unlink
 mremap
+mseal
 msgctl
 msgget
 msgrcv
@@ -652,6 +653,7 @@ uname
 unlink
 unlinkat
 unshare
+uretprobe
 uselib
 userfaultfd
 usr26
diff --git a/sysdeps/unix/sysv/linux/syscall_cancel.c b/sysdeps/unix/sysv/linux/syscall_cancel.c
new file mode 100644
index 0000000000..5fa0706486
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/syscall_cancel.c
@@ -0,0 +1,73 @@
+/* Pthread cancellation syscall bridge.  Default Linux version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <pthreadP.h>
+
+#warning "This implementation should be use just as reference or for bootstrapping"
+
+/* This is the generic version of the cancellable syscall code which
+   adds the label guards (__syscall_cancel_arch_{start,end}) used on SIGCANCEL
+   handler to check if the cancelled syscall have side-effects that need to be
+   returned to the caller.
+
+   This implementation should be used as a reference one to document the
+   implementation constraints:
+
+     1. The __syscall_cancel_arch_start should point just before the test
+        that thread is already cancelled,
+     2.	The __syscall_cancel_arch_end should point to the immediate next
+        instruction after the syscall one.
+     3. It should return the syscall value or a negative result if is has
+        failed, similar to INTERNAL_SYSCALL_CALL.
+
+   The __syscall_cancel_arch_end one is because the kernel will signal
+   interrupted syscall with side effects by setting the signal frame program
+   counter (on the ucontext_t third argument from SA_SIGINFO signal handler)
+   right after the syscall instruction.
+
+   For some architecture, the INTERNAL_SYSCALL_NCS macro use more instructions
+   to get the error condition from kernel (as for powerpc and sparc that
+   checks for the conditional register), or uses an out of the line helper
+   (ARM thumb), or uses a kernel helper gate (i686 or ia64).  In this case
+   the architecture should either adjust the macro or provide a custom
+   __syscall_cancel_arch implementation.   */
+
+long int
+__syscall_cancel_arch (volatile int *ch, __syscall_arg_t nr,
+		       __syscall_arg_t a1, __syscall_arg_t a2,
+		       __syscall_arg_t a3, __syscall_arg_t a4,
+		       __syscall_arg_t a5, __syscall_arg_t a6
+		       __SYSCALL_CANCEL7_ARG_DEF)
+{
+#define ADD_LABEL(__label)		\
+  asm volatile (			\
+    ".global " __label "\t\n"		\
+    __label ":\n");
+
+  ADD_LABEL ("__syscall_cancel_arch_start");
+  if (__glibc_unlikely (*ch & CANCELED_BITMASK))
+    __syscall_do_cancel();
+
+  long int result = INTERNAL_SYSCALL_NCS_CALL (nr, a1, a2, a3, a4, a5, a6
+					       __SYSCALL_CANCEL7_ARG7);
+  ADD_LABEL ("__syscall_cancel_arch_end");
+  if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result)))
+    return -INTERNAL_SYSCALL_ERRNO (result);
+  return result;
+}
diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list
index 9ac42c3436..f1cfe8dc13 100644
--- a/sysdeps/unix/sysv/linux/syscalls.list
+++ b/sysdeps/unix/sysv/linux/syscalls.list
@@ -102,8 +102,8 @@ name_to_handle_at EXTRA	name_to_handle_at i:isppi name_to_handle_at
 setns		EXTRA	setns		i:ii	setns
 
 memfd_create    EXTRA	memfd_create	i:si    memfd_create
-pkey_alloc	EXTRA	pkey_alloc	i:ii	pkey_alloc
-pkey_free	EXTRA	pkey_free	i:i	pkey_free
+pkey_alloc	EXTRA	pkey_alloc	i:ii	__pkey_alloc	pkey_alloc
+pkey_free	EXTRA	pkey_free	i:i	__pkey_free	pkey_free
 gettid          EXTRA   gettid          Ei:     __gettid	gettid
 tgkill          EXTRA   tgkill          i:iii   __tgkill	tgkill
 close_range     -       close_range     i:iii   __close_range   close_range
diff --git a/sysdeps/unix/sysv/linux/sysdep-cancel.h b/sysdeps/unix/sysv/linux/sysdep-cancel.h
index c48a50fa88..3f1543fec2 100644
--- a/sysdeps/unix/sysv/linux/sysdep-cancel.h
+++ b/sysdeps/unix/sysv/linux/sysdep-cancel.h
@@ -21,17 +21,5 @@
 #define _SYSDEP_CANCEL_H
 
 #include <sysdep.h>
-#include <tls.h>
-#include <errno.h>
-
-/* Set cancellation mode to asynchronous.  */
-extern int __pthread_enable_asynccancel (void);
-libc_hidden_proto (__pthread_enable_asynccancel)
-#define LIBC_CANCEL_ASYNC() __pthread_enable_asynccancel ()
-
-/* Reset to previous cancellation mode.  */
-extern void __pthread_disable_asynccancel (int oldtype);
-libc_hidden_proto (__pthread_disable_asynccancel)
-#define LIBC_CANCEL_RESET(oldtype) __pthread_disable_asynccancel (oldtype)
 
 #endif
diff --git a/sysdeps/unix/sysv/linux/time.c b/sysdeps/unix/sysv/linux/time.c
index f8b0cee41c..a56ef6fa30 100644
--- a/sysdeps/unix/sysv/linux/time.c
+++ b/sysdeps/unix/sysv/linux/time.c
@@ -33,11 +33,10 @@ time_syscall (time_t *t)
 }
 
 # undef INIT_ARCH
-# define INIT_ARCH() \
-  void *vdso_time = dl_vdso_vsym (HAVE_TIME_VSYSCALL);
+# define INIT_ARCH()
 libc_ifunc (time,
-	    vdso_time ? VDSO_IFUNC_RET (vdso_time)
-		      : (void *) time_syscall);
+	    GLRO(dl_vdso_time) != NULL ? VDSO_IFUNC_RET (GLRO(dl_vdso_time))
+				       : (void *) time_syscall);
 
 # else
 time_t
diff --git a/sysdeps/unix/sysv/linux/tst-linux-mremap1.c b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c
new file mode 100644
index 0000000000..408e8af2ab
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c
@@ -0,0 +1,63 @@
+/* Test mremap with MREMAP_DONTUNMAP.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <support/xstdlib.h>
+#include <support/xunistd.h>
+#include <support/check.h>
+#include <support/test-driver.h>
+#include <mremap-failure.h>
+
+static int
+do_test (void)
+{
+  size_t old_size = getpagesize ();
+  size_t new_size = old_size;
+  char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE,
+			  MAP_PRIVATE | MAP_ANONYMOUS, -1);
+  old_addr[0] = 1;
+  old_addr[old_size - 1] = 2;
+
+  /* Create an available 64-page mmap region.  */
+  size_t fixed_size = old_size * 64;
+  char *fixed_addr = xmmap (NULL, fixed_size, PROT_READ | PROT_WRITE,
+			    MAP_PRIVATE | MAP_ANONYMOUS, -1);
+  xmunmap (fixed_addr, fixed_size);
+
+  /* Add 3 * pagesize.  */
+  fixed_size += 3 * old_size;
+
+  /* Test MREMAP_DONTUNMAP.  It should return FIXED_ADDR created above.  */
+  char *new_addr = mremap (old_addr, old_size, new_size,
+			   MREMAP_DONTUNMAP | MREMAP_MAYMOVE,
+			   fixed_addr);
+  if (new_addr == MAP_FAILED)
+    return mremap_failure_exit (errno);
+  TEST_VERIFY_EXIT (fixed_addr == new_addr);
+  old_addr[0] = 3;
+  old_addr[old_size - 1] = 4;
+  new_addr[0] = 1;
+  new_addr[new_size - 1] = 2;
+  xmunmap (new_addr, new_size);
+  xmunmap (old_addr, old_size);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py
index 441261c945..2191bea36f 100644
--- a/sysdeps/unix/sysv/linux/tst-mman-consts.py
+++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py
@@ -33,7 +33,7 @@ def main():
                         help='C compiler (including options) to use')
     args = parser.parse_args()
     linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
-    linux_version_glibc = (6, 9)
+    linux_version_glibc = (6, 11)
     sys.exit(glibcextract.compare_macro_consts(
         '#define _GNU_SOURCE 1\n'
         '#include <sys/mman.h>\n',
diff --git a/sysdeps/unix/sysv/linux/tst-mount-consts.py b/sysdeps/unix/sysv/linux/tst-mount-consts.py
index c4a67221c1..b71d8a489d 100755
--- a/sysdeps/unix/sysv/linux/tst-mount-consts.py
+++ b/sysdeps/unix/sysv/linux/tst-mount-consts.py
@@ -39,10 +39,10 @@ def main():
         sys.exit (77)
 
     linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
-    # Constants in glibc were updated to match Linux v6.9.  When glibc
+    # Constants in glibc were updated to match Linux v6.10.  When glibc
     # constants are updated this value should be updated to match the
     # released kernel version from which the constants were taken.
-    linux_version_glibc = (6, 9)
+    linux_version_glibc = (6, 11)
     def check(cte, exclude=None):
         return glibcextract.compare_macro_consts(
                 '#include <sys/mount.h>\n',
diff --git a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py
index 6f05291949..9d53102cbd 100644
--- a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py
+++ b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py
@@ -39,7 +39,7 @@ def main():
         sys.exit (77)
 
     linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
-    linux_version_glibc = (6, 9)
+    linux_version_glibc = (6, 11)
     sys.exit(glibcextract.compare_macro_consts(
                 '#include <sys/pidfd.h>\n',
                 '#include <asm/fcntl.h>\n'
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
index b9f498473d..2a246c4593 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
+++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
@@ -28,6 +28,11 @@
 #include <sys/rseq.h>
 #include <unistd.h>
 
+/* Set this in 'do_test' only so as to invoke the destructor test in
+   the test process only and not 'support_test_main' parent.  Otherwise
+   the test harness may hang in the destructor if something goes wrong.  */
+static int run_destructor_test;
+
 #ifdef RSEQ_SIG
 # include <array_length.h>
 # include <errno.h>
@@ -236,6 +241,9 @@ do_rseq_test (void)
 static void __attribute__ ((destructor))
 do_rseq_destructor_test (void)
 {
+  if (!run_destructor_test)
+    return;
+
   /* Cannot use deferred failure reporting after main returns.  */
   if (do_rseq_test ())
     FAIL_EXIT1 ("rseq not registered within destructor");
@@ -254,6 +262,7 @@ do_rseq_test (void)
 static int
 do_test (void)
 {
+  run_destructor_test = 1;
   return do_rseq_test ();
 }
 
diff --git a/sysdeps/unix/sysv/linux/tst-sched-consts.py b/sysdeps/unix/sysv/linux/tst-sched-consts.py
new file mode 100644
index 0000000000..f06ac400bd
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-sched-consts.py
@@ -0,0 +1,56 @@
+#!/usr/bin/python3
+# Test that glibc's sched.h constants match the kernel's.
+# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import argparse
+import sys
+
+import glibcextract
+import glibcsyscalls
+
+
+def main():
+    """The main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Test that glibc's sched.h constants "
+        "match the kernel's.")
+    parser.add_argument('--cc', metavar='CC',
+                        help='C compiler (including options) to use')
+    args = parser.parse_args()
+    linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
+    linux_version_glibc = (6, 11)
+    sys.exit(glibcextract.compare_macro_consts(
+        '#define _GNU_SOURCE 1\n'
+        '#include <sched.h>\n',
+        '#define _GNU_SOURCE 1\n'
+        '#include <linux/sched.h>\n'
+        '#include <linux/sched/types.h>\n',
+        args.cc,
+        'SCHED_.*',
+        # SCHED_ISO is reserved, but not implemented in the kernel.
+        # SCHED_OTHER is the standard name for SCHED_NORMAL.
+        # SCHED_FLAG_ALL will receive more and more flags, so
+        # exposing it to userspace does not seem useful.
+        'SCHED_ISO'
+        '|SCHED_OTHER'
+        '|SCHED_FLAG_ALL',
+        linux_version_glibc > linux_version_headers,
+        linux_version_headers > linux_version_glibc))
+
+if __name__ == '__main__':
+    main()
diff --git a/sysdeps/unix/sysv/linux/tst-sched_setattr.c b/sysdeps/unix/sysv/linux/tst-sched_setattr.c
new file mode 100644
index 0000000000..fbb73c31a7
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-sched_setattr.c
@@ -0,0 +1,107 @@
+/* Tests for sched_setattr and sched_getattr.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sched.h>
+
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <support/check.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+/* Padding struct to detect unexpected writes.   */
+union
+{
+  struct sched_attr attr;
+  /* Hopefully the kernel will never need as much.  */
+  unsigned char padding[4096];
+} u;
+
+static void
+check_unused (void)
+{
+  TEST_VERIFY (u.attr.size < sizeof (u));
+  for (unsigned int i = u.attr.size; i < sizeof (u); ++i)
+    TEST_COMPARE (u.padding[i], 0xcc);
+}
+
+static int
+do_test (void)
+{
+  _Static_assert (SCHED_OTHER == SCHED_NORMAL,
+                  "SCHED_OTHER, SCHED_NORMAL values");
+  TEST_VERIFY (sizeof (struct sched_attr) < sizeof (u));
+
+  /* Check that reading and re-applying the current policy works.  */
+  memset (&u, 0xcc, sizeof (u));
+  /* Compiler barrier to bypass write access attribute.  */
+  volatile unsigned int size = sizeof (u);
+  TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0);
+  check_unused ();
+  TEST_COMPARE (sched_setattr (0, &u.attr, 0), 0); /* Apply unchanged.  */
+
+  /* Try to switch to the SCHED_OTHER policy.   */
+  memset (&u, 0, sizeof (u));
+  u.attr.size = sizeof (u); /* With padding, kernel should accept zeroes.  */
+  u.attr.sched_policy = SCHED_OTHER; /* Should be the default.  */
+  {
+    errno = 0;
+    int prio = getpriority (PRIO_PROCESS, 0);
+    if (errno != 0)
+      prio = 0;
+    u.attr.sched_nice = prio;
+  }
+  TEST_COMPARE (sched_setattr (0, &u.attr, 0), 0);
+
+  /* Non-zero values not known to the kernel result in an E2BIG error.  */
+  memset (&u, 0, sizeof (u));
+  TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0);
+  u.padding[u.attr.size] = 0xcc;
+  u.attr.size = sizeof (u);
+  errno = 0;
+  TEST_COMPARE (sched_setattr (0, &u.attr, 0), -1);
+  TEST_COMPARE (errno, E2BIG);
+
+  memset (&u, 0xcc, sizeof (u));
+  TEST_COMPARE (sched_getattr (0, (struct sched_attr *) &u, size, 0), 0);
+  TEST_COMPARE (u.attr.sched_policy, SCHED_OTHER);
+  check_unused ();
+
+  /* Raise the niceless level to 19 and observe its effect.  */
+  TEST_COMPARE (nice (19), 19);
+  TEST_COMPARE (sched_getattr (0, &u.attr, sizeof (u.attr), 0), 0);
+  TEST_COMPARE (u.attr.sched_policy, SCHED_OTHER);
+  TEST_COMPARE (u.attr.sched_nice, 19);
+  check_unused ();
+
+  /* Invalid buffer arguments result in EINVAL (not EFAULT).  */
+  {
+    errno = 0;
+    void *volatile null_pointer = NULL; /* compiler barrier.  */
+    TEST_COMPARE (sched_setattr (0, null_pointer, 0), -1);
+    TEST_COMPARE (errno, EINVAL);
+    errno = 0;
+    TEST_COMPARE (sched_getattr (0, null_pointer, size, 0), -1);
+    TEST_COMPARE (errno, EINVAL);
+  }
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c b/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c
index 31a15b3ad7..2f921ed397 100644
--- a/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c
+++ b/sysdeps/unix/sysv/linux/tst-skeleton-affinity.c
@@ -157,7 +157,7 @@ test_size (const struct conf *conf, size_t size)
   if (setaffinity (kernel_size, initial_set) < 0)
     {
       printf ("error: size %zu: setaffinity: %m\n", size);
-      return true;
+      return false;
     }
 
   /* Use one-CPU set to test switching between CPUs.  */
diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c
new file mode 100644
index 0000000000..84a8a41b5c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c
@@ -0,0 +1,112 @@
+/* Test if a syscall is correctly restarted.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <support/xsignal.h>
+#include <support/check.h>
+#include <support/process_state.h>
+#include <support/xunistd.h>
+#include <support/xthread.h>
+#include <sys/wait.h>
+
+static int
+check_pid (pid_t pid)
+{
+  /* Wait until the child has called pause and it blocking on kernel.  */
+  support_process_state_wait (pid, support_process_state_sleeping);
+
+  TEST_COMPARE (kill (pid, SIGSTOP), 0);
+
+  /* Adding process_state_tracing_stop ('t') allows the test to work under
+     trace programs such as ptrace.  */
+  support_process_state_wait (pid, support_process_state_stopped
+				   | support_process_state_tracing_stop);
+
+  TEST_COMPARE (kill (pid, SIGCONT), 0);
+
+  enum support_process_state state
+    = support_process_state_wait (pid, support_process_state_sleeping
+				       | support_process_state_zombie);
+
+  TEST_COMPARE (state, support_process_state_sleeping);
+
+  TEST_COMPARE (kill (pid, SIGTERM), 0);
+
+  siginfo_t info;
+  TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0);
+  TEST_COMPARE (info.si_signo, SIGCHLD);
+  TEST_COMPARE (info.si_code, CLD_KILLED);
+  TEST_COMPARE (info.si_status, SIGTERM);
+  TEST_COMPARE (info.si_pid, pid);
+
+  return 0;
+}
+
+static void *
+tf (void *)
+{
+  pause ();
+  return NULL;
+}
+
+static void
+child_mt (void)
+{
+  /* Let only the created thread to handle signals.  */
+  sigset_t set;
+  sigfillset (&set);
+  xpthread_sigmask (SIG_BLOCK, &set, NULL);
+
+  sigdelset (&set, SIGSTOP);
+  sigdelset (&set, SIGCONT);
+  sigdelset (&set, SIGTERM);
+
+  pthread_attr_t attr;
+  xpthread_attr_init (&attr);
+  TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0);
+
+  xpthread_join (xpthread_create (&attr, tf, NULL));
+}
+
+static void
+do_test_syscall (bool multithread)
+{
+  pid_t pid = xfork ();
+  if (pid == 0)
+    {
+      if (multithread)
+	child_mt ();
+      else
+	pause ();
+      _exit (127);
+    }
+
+  check_pid (pid);
+}
+
+static int
+do_test (void)
+{
+  /* Check for both single and multi thread, since they use different syscall
+     mechanisms.  */
+  do_test_syscall (false);
+  do_test_syscall (true);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/x86/pkey_get.c b/sysdeps/unix/sysv/linux/x86/pkey_get.c
index eb988a5c45..0fb52ba18c 100644
--- a/sysdeps/unix/sysv/linux/x86/pkey_get.c
+++ b/sysdeps/unix/sysv/linux/x86/pkey_get.c
@@ -18,9 +18,10 @@
 
 #include <arch-pkey.h>
 #include <errno.h>
+#include <sys/mman.h>
 
 int
-pkey_get (int key)
+__pkey_get (int key)
 {
   if (key < 0 || key > 15)
     {
@@ -31,3 +32,5 @@ pkey_get (int key)
   return (pkru >> (2 * key)) & 3;
   return 0;
 }
+libc_hidden_def (__pkey_get)
+weak_alias (__pkey_get, pkey_get)
diff --git a/sysdeps/unix/sysv/linux/x86/pkey_set.c b/sysdeps/unix/sysv/linux/x86/pkey_set.c
index 3fd2c95580..51e3d68d92 100644
--- a/sysdeps/unix/sysv/linux/x86/pkey_set.c
+++ b/sysdeps/unix/sysv/linux/x86/pkey_set.c
@@ -18,9 +18,10 @@
 
 #include <arch-pkey.h>
 #include <errno.h>
+#include <sys/mman.h>
 
 int
-pkey_set (int key, unsigned int rights)
+__pkey_set (int key, unsigned int rights)
 {
   if (key < 0 || key > 15 || rights > 3)
     {
@@ -33,3 +34,5 @@ pkey_set (int key, unsigned int rights)
   pkey_write (pkru);
   return 0;
 }
+libc_hidden_def (__pkey_set)
+weak_alias (__pkey_set, pkey_set)
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
index b122216013..dfc10d0c7e 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
@@ -189,6 +189,7 @@
 #define __NR_mq_timedsend 242
 #define __NR_mq_unlink 241
 #define __NR_mremap 25
+#define __NR_mseal 462
 #define __NR_msgctl 71
 #define __NR_msgget 68
 #define __NR_msgrcv 70
@@ -358,6 +359,7 @@
 #define __NR_unlink 87
 #define __NR_unlinkat 263
 #define __NR_unshare 272
+#define __NR_uretprobe 335
 #define __NR_uselib 134
 #define __NR_userfaultfd 323
 #define __NR_ustat 136
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
index aea7848ed6..5acf49dbe8 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
@@ -2744,6 +2744,8 @@ GLIBC_2.4 sys_errlist D 0x420
 GLIBC_2.4 sys_nerr D 0x4
 GLIBC_2.4 unlinkat F
 GLIBC_2.4 unshare F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
 GLIBC_2.5 __readlinkat_chk F
 GLIBC_2.5 inet6_opt_append F
 GLIBC_2.5 inet6_opt_find F
diff --git a/sysdeps/unix/sysv/linux/x86_64/syscall_cancel.S b/sysdeps/unix/sysv/linux/x86_64/syscall_cancel.S
new file mode 100644
index 0000000000..cda9d20a83
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/syscall_cancel.S
@@ -0,0 +1,57 @@
+/* Cancellable syscall wrapper.  Linux/x86_64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <descr-const.h>
+
+/* long int [rax] __syscall_cancel_arch (volatile int *cancelhandling [%rdi],
+					 __syscall_arg_t nr   [%rsi],
+					 __syscall_arg_t arg1 [%rdx],
+					 __syscall_arg_t arg2 [%rcx],
+					 __syscall_arg_t arg3 [%r8],
+					 __syscall_arg_t arg4 [%r9],
+					 __syscall_arg_t arg5 [SP+8],
+					 __syscall_arg_t arg6 [SP+16])  */
+
+ENTRY (__syscall_cancel_arch)
+	.globl __syscall_cancel_arch_start
+__syscall_cancel_arch_start:
+
+	/* if (*cancelhandling & CANCELED_BITMASK)
+	     __syscall_do_cancel()  */
+	mov    (%rdi),%eax
+	testb  $TCB_CANCELED_BITMASK, (%rdi)
+	jne    __syscall_do_cancel
+
+	/* Issue a 6 argument syscall, the nr [%rax] being the syscall
+	   number.  */
+	mov    %rdi,%r11
+	mov    %rsi,%rax
+	mov    %rdx,%rdi
+	mov    %rcx,%rsi
+	mov    %r8,%rdx
+	mov    %r9,%r10
+	mov    8(%rsp),%r8
+	mov    16(%rsp),%r9
+	mov    %r11,8(%rsp)
+	syscall
+
+	.globl __syscall_cancel_arch_end
+__syscall_cancel_arch_end:
+	ret
+END (__syscall_cancel_arch)
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
index df3e22236d..9340daa967 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
@@ -183,6 +183,7 @@
 #define __NR_mq_timedsend 1073742066
 #define __NR_mq_unlink 1073742065
 #define __NR_mremap 1073741849
+#define __NR_mseal 1073742286
 #define __NR_msgctl 1073741895
 #define __NR_msgget 1073741892
 #define __NR_msgrcv 1073741894
@@ -350,6 +351,7 @@
 #define __NR_unlink 1073741911
 #define __NR_unlinkat 1073742087
 #define __NR_unshare 1073742096
+#define __NR_uretprobe 1073742159
 #define __NR_userfaultfd 1073742147
 #define __NR_ustat 1073741960
 #define __NR_utime 1073741956
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
index 4ab3681914..02d1bb97dc 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
@@ -2763,3 +2763,5 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.41 sched_getattr F
+GLIBC_2.41 sched_setattr F
diff --git a/sysdeps/generic/dl-fptr.h b/sysdeps/unix/sysv/linux/x86_64/x32/syscall_types.h
index 71cfb62411..ac2019751d 100644
--- a/sysdeps/generic/dl-fptr.h
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/syscall_types.h
@@ -1,5 +1,5 @@
-/* Function descriptors. Generic version.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+/* Types and macros used for syscall issuing.  x86_64/x32 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,30 +16,19 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef dl_fptr_h
-#define dl_fptr_h 1
+#ifndef _SYSCALL_TYPES_H
+#define _SYSCALL_TYPES_H
 
-/* An FDESC is a function descriptor.  */
+#include <libc-diag.h>
 
-struct fdesc
-  {
-    ElfW(Addr) ip;	/* code entry point */
-    ElfW(Addr) gp;	/* global pointer */
-  };
+typedef long long int __syscall_arg_t;
 
-struct fdesc_table
-  {
-    struct fdesc_table *next;
-    unsigned int len;			/* # of entries in fdesc table */
-    volatile unsigned int first_unused;	/* index of first available entry */
-    struct fdesc fdesc[0];
-  };
+/* Syscall arguments for x32 follows x86_64 ABI, however pointers are 32 bits
+   should be zero extended.  */
+#define __SSC(__x) \
+  ({					\
+    TYPEFY (__x, __tmp) = ARGIFY (__x);	\
+    (__syscall_arg_t) __tmp;		\
+  })
 
-struct link_map;
-
-extern ElfW(Addr) _dl_boot_fptr_table [];
-
-extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *,
-				 ElfW(Addr));
-
-#endif /* !dl_fptr_h */
+#endif
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c096dd390a..1f30e237f5 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -756,6 +756,12 @@ init_cpu_features (struct cpu_features *cpu_features)
   unsigned int stepping = 0;
   enum cpu_features_kind kind;
 
+  /* Default is avoid non-temporal memset for non Intel/AMD/Hygon hardware. This is,
+     as of writing this, we only have benchmarks indicatings it profitability
+     on Intel/AMD/Hygon.  */
+  cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+      |= bit_arch_Avoid_Non_Temporal_Memset;
+
   cpu_features->cachesize_non_temporal_divisor = 4;
 #if !HAS_CPUID
   if (__get_cpuid_max (0, 0) == 0)
@@ -781,6 +787,11 @@ init_cpu_features (struct cpu_features *cpu_features)
 
       update_active (cpu_features);
 
+      /* Benchmarks indicate non-temporal memset can be profitable on Intel
+	hardware.  */
+      cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+	  &= ~bit_arch_Avoid_Non_Temporal_Memset;
+
       if (family == 0x06)
 	{
 	  model += extended_model;
@@ -879,6 +890,7 @@ init_cpu_features (struct cpu_features *cpu_features)
 		     non-temporal on all Skylake servers. */
 	      cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
 		  |= bit_arch_Avoid_Non_Temporal_Memset;
+	      /* fallthrough */
 	    case INTEL_BIGCORE_COMETLAKE:
 	    case INTEL_BIGCORE_SKYLAKE:
 	    case INTEL_BIGCORE_KABYLAKE:
@@ -974,9 +986,8 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 	cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
 	  |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
     }
-  /* This spells out "AuthenticAMD" or "HygonGenuine".  */
-  else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
-	   || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
+  /* This spells out "AuthenticAMD".  */
+  else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
     {
       unsigned int extended_model;
 
@@ -991,6 +1002,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 
       ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
 
+      /* Benchmarks indicate non-temporal memset can be profitable on AMD
+	hardware.  */
+      cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+	  &= ~bit_arch_Avoid_Non_Temporal_Memset;
+
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
 	{
 	  /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
@@ -1073,6 +1089,7 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 	      /* Yongfeng and Shijidadao mircoarch tuning.  */
 	    case 0x5b:
 	      cpu_features->cachesize_non_temporal_divisor = 2;
+	      /* fallthrough */
 	    case 0x6b:
 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
 		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
@@ -1086,6 +1103,25 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 	    }
 	}
     }
+  /* This spells out "HygonGenuine".  */
+  else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
+    {
+      unsigned int extended_model;
+
+      kind = arch_kind_hygon;
+
+      get_common_indices (cpu_features, &family, &model, &extended_model,
+			  &stepping);
+
+      get_extended_indices (cpu_features);
+
+      update_active (cpu_features);
+
+      /* Benchmarks indicate non-temporal memset can be profitable on Hygon
+       hardware.  */
+      cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+	    &= ~bit_arch_Avoid_Non_Temporal_Memset;
+    }
   else
     {
       kind = arch_kind_other;
@@ -1101,6 +1137,10 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
   if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
     cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
 
+  /* No ERMS, we want to avoid stosb for memset.  */
+  if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+    cpu_features->preferred[index_arch_Avoid_STOSB] |= bit_arch_Avoid_STOSB;
+
 #if !HAS_CPUID
 no_cpuid:
 #endif
diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c
index ae9dcd6180..cd36de2d8b 100644
--- a/sysdeps/x86/cpu-tunables.c
+++ b/sysdeps/x86/cpu-tunables.c
@@ -193,6 +193,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 						11);
 	      CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Prefer_FSRM,
 						11);
+	      CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Avoid_STOSB,
+						11);
 	      CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH (n, cpu_features,
 						     Slow_SSE4_2,
 						     SSE4_2,
@@ -248,6 +250,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 	    CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
 					      Avoid_Non_Temporal_Memset, 25);
 	  }
+	  break;
 	case 26:
 	    {
 	      CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index a1c03b8903..e9579505a3 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -567,6 +567,48 @@ handle_zhaoxin (int name)
   return 0;
 }
 
+static long int __attribute__ ((noinline))
+handle_hygon (int name)
+{
+  unsigned int eax;
+  unsigned int ebx;
+  unsigned int ecx;
+  unsigned int edx;
+  unsigned int count = 0x1;
+
+  if (name >= _SC_LEVEL3_CACHE_SIZE)
+    count = 0x3;
+  else if (name >= _SC_LEVEL2_CACHE_SIZE)
+    count = 0x2;
+  else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+    count = 0x0;
+
+  /* Use __cpuid__ '0x8000_001D' to compute cache details.  */
+  __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
+
+  switch (name)
+    {
+    case _SC_LEVEL1_ICACHE_ASSOC:
+    case _SC_LEVEL1_DCACHE_ASSOC:
+    case _SC_LEVEL2_CACHE_ASSOC:
+    case _SC_LEVEL3_CACHE_ASSOC:
+      return ((ebx >> 22) & 0x3ff) + 1;
+    case _SC_LEVEL1_ICACHE_LINESIZE:
+    case _SC_LEVEL1_DCACHE_LINESIZE:
+    case _SC_LEVEL2_CACHE_LINESIZE:
+    case _SC_LEVEL3_CACHE_LINESIZE:
+      return (ebx & 0xfff) + 1;
+    case _SC_LEVEL1_ICACHE_SIZE:
+    case _SC_LEVEL1_DCACHE_SIZE:
+    case _SC_LEVEL2_CACHE_SIZE:
+    case _SC_LEVEL3_CACHE_SIZE:
+      return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1);
+    default:
+      __builtin_unreachable ();
+    }
+  return -1;
+}
+
 static void
 get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, unsigned int *threads_ptr,
                 long int core)
@@ -889,6 +931,24 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
 
       shared_per_thread = shared;
     }
+  else if (cpu_features->basic.kind == arch_kind_hygon)
+    {
+      data = handle_hygon (_SC_LEVEL1_DCACHE_SIZE);
+      shared = handle_hygon (_SC_LEVEL3_CACHE_SIZE);
+      shared_per_thread = shared;
+
+      level1_icache_size = handle_hygon (_SC_LEVEL1_ICACHE_SIZE);
+      level1_icache_linesize = handle_hygon (_SC_LEVEL1_ICACHE_LINESIZE);
+      level1_dcache_size = data;
+      level1_dcache_assoc = handle_hygon (_SC_LEVEL1_DCACHE_ASSOC);
+      level1_dcache_linesize = handle_hygon (_SC_LEVEL1_DCACHE_LINESIZE);
+      level2_cache_size = handle_hygon (_SC_LEVEL2_CACHE_SIZE);;
+      level2_cache_assoc = handle_hygon (_SC_LEVEL2_CACHE_ASSOC);
+      level2_cache_linesize = handle_hygon (_SC_LEVEL2_CACHE_LINESIZE);
+      level3_cache_size = shared;
+      level3_cache_assoc = handle_hygon (_SC_LEVEL3_CACHE_ASSOC);
+      level3_cache_linesize = handle_hygon (_SC_LEVEL3_CACHE_LINESIZE);
+    }
 
   cpu_features->level1_icache_size = level1_icache_size;
   cpu_features->level1_icache_linesize = level1_icache_linesize;
@@ -988,14 +1048,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
     rep_movsb_threshold = 2112;
 
-  /* Non-temporal stores are more performant on Intel and AMD hardware above
-     non_temporal_threshold. Enable this for both Intel and AMD hardware. */
-  unsigned long int memset_non_temporal_threshold = SIZE_MAX;
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset)
-      && (cpu_features->basic.kind == arch_kind_intel
-	  || cpu_features->basic.kind == arch_kind_amd))
-    memset_non_temporal_threshold = non_temporal_threshold;
-
   /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
      cases slower than the vectorized path (and for some alignments,
      it is really slow, check BZ #30994).  */
@@ -1017,6 +1069,13 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   if (tunable_size != 0)
     shared = tunable_size;
 
+  /* Non-temporal stores are more performant on some hardware above
+     non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both
+     Intel, AMD and Hygon hardware. */
+  unsigned long int memset_non_temporal_threshold = SIZE_MAX;
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset))
+    memset_non_temporal_threshold = non_temporal_threshold;
+
   tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
   if (tunable_size > minimum_non_temporal_threshold
       && tunable_size <= maximum_non_temporal_threshold)
@@ -1042,18 +1101,42 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
        slightly better than ERMS.  */
     rep_stosb_threshold = SIZE_MAX;
 
+  /*
+     For memset, the non-temporal implementation is only accessed through the
+     stosb code. ie:
+     ```
+     if (size >= rep_stosb_thresh)
+     {
+    	if (size >= non_temporal_thresh)
+     {
+     do_non_temporal ();
+     }
+    	do_stosb ();
+     }
+     do_normal_vec_loop ();
+     ```
+     So if we prefer non-temporal, set `rep_stosb_thresh = non_temporal_thresh`
+     to enable the implementation. If `rep_stosb_thresh = non_temporal_thresh`,
+    `rep stosb` will never be used.
+   */
+  TUNABLE_SET_WITH_BOUNDS (x86_memset_non_temporal_threshold,
+			   memset_non_temporal_threshold,
+			   minimum_non_temporal_threshold, SIZE_MAX);
+  /* Do `rep_stosb_thresh = non_temporal_thresh` after setting/getting the
+     final value of `x86_memset_non_temporal_threshold`. In some cases this can
+     be a matter of correctness.  */
+  if (CPU_FEATURES_ARCH_P (cpu_features, Avoid_STOSB))
+    rep_stosb_threshold
+	= TUNABLE_GET (x86_memset_non_temporal_threshold, long int, NULL);
+  TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,
+			   SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold,
 			   minimum_non_temporal_threshold,
 			   maximum_non_temporal_threshold);
-  TUNABLE_SET_WITH_BOUNDS (x86_memset_non_temporal_threshold,
-			   memset_non_temporal_threshold,
-			   minimum_non_temporal_threshold, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold,
 			   minimum_rep_movsb_threshold, SIZE_MAX);
-  TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,
-			   SIZE_MAX);
 
   unsigned long int rep_movsb_stop_threshold;
   /* Setting the upper bound of ERMS to the computed value of
diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
index 61bbbc2e89..2a58000147 100644
--- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
+++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
@@ -34,3 +34,4 @@ BIT (MathVec_Prefer_No_AVX512)
 BIT (Prefer_FSRM)
 BIT (Avoid_Short_Distance_REP_MOVSB)
 BIT (Avoid_Non_Temporal_Memset)
+BIT (Avoid_STOSB)
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index aaae44f0e1..b8e790c8fd 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -881,6 +881,7 @@ enum cpu_features_kind
   arch_kind_intel,
   arch_kind_amd,
   arch_kind_zhaoxin,
+  arch_kind_hygon,
   arch_kind_other
 };
 
diff --git a/sysdeps/x86/tst-hwcap-tunables.c b/sysdeps/x86/tst-hwcap-tunables.c
index 94307283d7..1920f5057e 100644
--- a/sysdeps/x86/tst-hwcap-tunables.c
+++ b/sysdeps/x86/tst-hwcap-tunables.c
@@ -60,7 +60,8 @@ static const struct test_t
     /* Disable everything.  */
     "-Prefer_ERMS,-Prefer_FSRM,-AVX,-AVX2,-AVX512F,-AVX512VL,"
     "-SSE4_1,-SSE4_2,-SSSE3,-Fast_Unaligned_Load,-ERMS,"
-    "-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset",
+    "-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset,"
+    "-Avoid_STOSB",
     test_1,
     array_length (test_1)
   },
@@ -68,7 +69,8 @@ static const struct test_t
     /* Same as before, but with some empty suboptions.  */
     ",-,-Prefer_ERMS,-Prefer_FSRM,-AVX,-AVX2,-AVX512F,-AVX512VL,"
     "-SSE4_1,-SSE4_2,-SSSE3,-Fast_Unaligned_Load,,-,"
-    "-ERMS,-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset,-,",
+    "-ERMS,-AVX_Fast_Unaligned_Load,-Avoid_Non_Temporal_Memset,"
+    "-Avoid_STOSB,-,",
     test_1,
     array_length (test_1)
   }
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 37d8998c71..80d0125642 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1430,28 +1430,28 @@ Function: "exp10_vlen8_avx2":
 float: 1
 
 Function: "exp10m1":
-double: 2
-float: 1
-float128: 1
-ldouble: 1
+double: 4
+float: 2
+float128: 3
+ldouble: 4
 
 Function: "exp10m1_downward":
-double: 1
-float: 1
-float128: 3
-ldouble: 2
+double: 3
+float: 3
+float128: 6
+ldouble: 6
 
 Function: "exp10m1_towardzero":
-double: 1
-float: 1
-float128: 3
-ldouble: 2
+double: 2
+float: 3
+float128: 6
+ldouble: 5
 
 Function: "exp10m1_upward":
-double: 3
-float: 1
-float128: 3
-ldouble: 2
+double: 5
+float: 4
+float128: 6
+ldouble: 6
 
 Function: "exp2":
 double: 1
@@ -1498,28 +1498,28 @@ Function: "exp2_vlen8_avx2":
 float: 1
 
 Function: "exp2m1":
-double: 1
-float: 1
-float128: 1
-ldouble: 1
-
-Function: "exp2m1_downward":
 double: 2
-float: 1
+float: 2
 float128: 2
 ldouble: 3
 
+Function: "exp2m1_downward":
+double: 3
+float: 3
+float128: 3
+ldouble: 6
+
 Function: "exp2m1_towardzero":
-double: 2
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 2
+float128: 4
+ldouble: 5
 
 Function: "exp2m1_upward":
-double: 1
-float: 1
-float128: 2
-ldouble: 3
+double: 3
+float: 3
+float128: 5
+ldouble: 6
 
 Function: "exp_downward":
 double: 1
@@ -1808,28 +1808,28 @@ Function: "log10_vlen8_avx2":
 float: 1
 
 Function: "log10p1":
-double: 1
-float: 1
+double: 2
+float: 2
 float128: 3
-ldouble: 2
+ldouble: 4
 
 Function: "log10p1_downward":
 double: 2
-float: 1
-float128: 2
-ldouble: 4
+float: 3
+float128: 4
+ldouble: 8
 
 Function: "log10p1_towardzero":
-double: 2
+double: 3
 float: 2
-float128: 2
-ldouble: 4
+float128: 3
+ldouble: 8
 
 Function: "log10p1_upward":
 double: 2
-float: 1
-float128: 3
-ldouble: 3
+float: 3
+float128: 4
+ldouble: 6
 
 Function: "log1p":
 double: 1
@@ -2263,25 +2263,21 @@ double: 1
 
 Function: "tgamma":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 10e6e625b3..92409a2779 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 85725e29c0..b5b63fd125 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
index 7a637ef7ca..8dc3d7ab5a 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -46,6 +46,13 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
   attribute_hidden;
 
+static inline int
+prefer_erms_nt_impl (const struct cpu_features *cpu_features)
+{
+  return CPU_FEATURE_USABLE_P (cpu_features, ERMS)
+	 || !CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset);
+}
+
 static inline void *
 IFUNC_SELECTOR (void)
 {
@@ -61,7 +68,7 @@ IFUNC_SELECTOR (void)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	{
-	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+	  if (prefer_erms_nt_impl (cpu_features))
 	    return OPTIMIZE (avx512_unaligned_erms);
 
 	  return OPTIMIZE (avx512_unaligned);
@@ -76,7 +83,7 @@ IFUNC_SELECTOR (void)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	{
-	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+	  if (prefer_erms_nt_impl (cpu_features))
 	    return OPTIMIZE (evex_unaligned_erms);
 
 	  return OPTIMIZE (evex_unaligned);
@@ -84,7 +91,7 @@ IFUNC_SELECTOR (void)
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
 	{
-	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+	  if (prefer_erms_nt_impl (cpu_features))
 	    return OPTIMIZE (avx2_unaligned_erms_rtm);
 
 	  return OPTIMIZE (avx2_unaligned_rtm);
@@ -93,14 +100,15 @@ IFUNC_SELECTOR (void)
       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				       Prefer_No_VZEROUPPER, !))
 	{
-	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+	  if (prefer_erms_nt_impl (cpu_features))
 	    return OPTIMIZE (avx2_unaligned_erms);
 
 	  return OPTIMIZE (avx2_unaligned);
 	}
     }
 
-  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)
+      || !CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset))
     return OPTIMIZE (sse2_unaligned_erms);
 
   return OPTIMIZE (sse2_unaligned);
diff --git a/sysdeps/x86_64/multiarch/strchr-evex-base.S b/sysdeps/x86_64/multiarch/strchr-evex-base.S
index 04e2c0e79e..3a0b7c9d64 100644
--- a/sysdeps/x86_64/multiarch/strchr-evex-base.S
+++ b/sysdeps/x86_64/multiarch/strchr-evex-base.S
@@ -124,13 +124,13 @@ L(page_cross):
 	VPCMPNE	%VMM(1), %VMM(0), %k1
 	VPTEST	%VMM(1), %VMM(1), %k0{%k1}
 	KMOV	%k0, %VRAX
-# ifdef USE_AS_WCSCHR
+	sar	%cl, %VRAX
+#ifdef USE_AS_WCSCHR
 	sub	$VEC_MATCH_MASK, %VRAX
-# else
+#else
 	inc	%VRAX
-# endif
+#endif
 	/* Ignore number of character for alignment adjustment.  */
-	shr	%cl, %VRAX
 	jz	L(align_more)
 
 	bsf	%VRAX, %VRAX
diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S
index 06730ab2a1..cea034f394 100644
--- a/sysdeps/x86_64/multiarch/strcmp-evex.S
+++ b/sysdeps/x86_64/multiarch/strcmp-evex.S
@@ -209,7 +209,9 @@
    returned.  */
 
 	.section SECTION(.text), "ax", @progbits
-	.align	16
+	/* Align 64 bytes here. This is to get the L(loop) block ideally
+	   aligned for the DSB.  */
+	.align	64
 	.type	STRCMP, @function
 	.globl	STRCMP
 # ifdef USE_AS_STRCASECMP_L
@@ -509,9 +511,7 @@ L(ret4):
 	ret
 # endif
 
-	/* 32 byte align here ensures the main loop is ideally aligned
-	   for DSB.  */
-	.p2align 5
+	.p2align 4,, 4
 L(more_3x_vec):
 	/* Safe to compare 4x vectors.  */
 	VMOVU	(VEC_SIZE)(%rdi), %VMM(0)
@@ -1426,10 +1426,9 @@ L(less_32_till_page):
 L(ret_zero_page_cross_slow_case0):
 	xorl	%eax, %eax
 	ret
-# endif
-
-
+# else
 	.p2align 4,, 10
+# endif
 L(less_16_till_page):
 	cmpl	$((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax
 	ja	L(less_8_till_page)
@@ -1482,8 +1481,12 @@ L(less_16_till_page):
 # endif
 	jmp	L(prepare_loop_aligned)
 
-
-
+# ifndef USE_AS_STRNCMP
+	/* Fits in aligning bytes.  */
+L(ret_zero_4_loop):
+	xorl	%eax, %eax
+	ret
+# endif
 
 	.p2align 4,, 10
 L(less_8_till_page):
@@ -1554,6 +1557,7 @@ L(ret_less_8_wcs):
 
 #  ifdef USE_AS_STRNCMP
 	.p2align 4,, 2
+L(ret_zero_4_loop):
 L(ret_zero_page_cross_slow_case1):
 	xorl	%eax, %eax
 	ret
@@ -1586,10 +1590,6 @@ L(less_4_loop):
 	subq	$-(CHAR_PER_VEC * 4), %rdx
 #  endif
 	jmp	L(prepare_loop_aligned)
-
-L(ret_zero_4_loop):
-	xorl	%eax, %eax
-	ret
 L(ret_less_4_loop):
 	xorl	%r8d, %eax
 	subl	%r8d, %eax
diff --git a/sysdeps/x86_64/multiarch/strnlen-evex-base.S b/sysdeps/x86_64/multiarch/strnlen-evex-base.S
new file mode 100644
index 0000000000..1c2cfdfe06
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen-evex-base.S
@@ -0,0 +1,462 @@
+/* strnlen/wcsnlen optimized with 256/512-bit EVEX instructions.
+   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
+# include <sysdep.h>
+
+#ifdef USE_AS_WCSLEN
+# define VPCMPEQ	vpcmpeqd
+# define VPTESTN	vptestnmd
+# define VPMINU	vpminud
+# define CHAR_SIZE	4
+#else
+# define VPCMPEQ	vpcmpeqb
+# define VPTESTN	vptestnmb
+# define VPMINU	vpminub
+# define CHAR_SIZE	1
+#endif
+
+#define XZERO	VMM_128(0)
+#define VZERO	VMM(0)
+#define PAGE_SIZE	4096
+#define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
+
+#if CHAR_PER_VEC == 32
+# define SUB_SHORT(imm, reg)	subb $(imm), %VGPR_SZ(reg, 8)
+#else
+# define SUB_SHORT(imm, reg)	subl $(imm), %VGPR_SZ(reg, 32)
+#endif
+
+#ifdef USE_AS_WCSLEN
+/* For wide-character, we care more about limitting code size
+   than optimally aligning targets, so just cap nop padding
+   reasonably low.  */
+# define P2ALIGN(...)	.p2align 4,, 6
+# define P2ALIGN_CLAMPED(...)	P2ALIGN(__VA_ARGS__)
+#else
+# define P2ALIGN(x)	.p2align x
+# define P2ALIGN_CLAMPED(x, y)	.p2align x,, y
+#endif
+
+	.section SECTION(.text), "ax", @progbits
+	/* Aligning entry point to 64 byte, provides better performance for
+	   one vector length string.  */
+ENTRY_P2ALIGN(STRNLEN, 6)
+	/* rdi is pointer to array, rsi is the upper limit.  */
+
+	/* Check zero length.  */
+	test	%RSI_LP, %RSI_LP
+	jz	L(zero)
+
+#ifdef __ILP32__
+	/* Clear the upper 32 bits.  */
+	movl	%esi, %esi
+#endif
+
+	vpxorq	%XZERO, %XZERO, %XZERO
+
+	/* Check that we won't cross a page boundary with our first load.  */
+	movl	%edi, %eax
+	shll	$20, %eax
+	cmpl	$((PAGE_SIZE - VEC_SIZE) << 20), %eax
+	ja	L(crosses_page_boundary)
+
+	/* Check the first VEC_SIZE bytes.  Each bit in K0 represents a
+	   null byte.  */
+	VPCMPEQ	(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRCX
+
+	/* If src (rcx) is zero, bsf does not change the result.  NB:
+	   Must use 64-bit bsf here so that upper bits of len are not
+	   cleared.  */
+	movq	%rsi, %rax
+	bsfq	%rcx, %rax
+
+	/* If rax > CHAR_PER_VEC then rcx must have been zero (no null
+	   CHAR) and rsi must be > CHAR_PER_VEC.  */
+	cmpq	$CHAR_PER_VEC, %rax
+	ja	L(more_1x_vec)
+
+	/* Check if first match in bounds.  */
+	cmpq	%rax, %rsi
+	cmovb	%esi, %eax
+	ret
+
+#if VEC_SIZE == 32
+	P2ALIGN_CLAMPED(4, 2)
+L(zero):
+L(max_0):
+	movl	%esi, %eax
+	ret
+#endif
+
+	P2ALIGN_CLAMPED(4, 10)
+L(more_1x_vec):
+L(cross_page_continue):
+	/* After this calculation, rax stores the number of elements
+	   left to be processed The complexity comes from the fact some
+	   elements get read twice due to alignment and we need to be
+	   sure we don't count them twice (else, it would just be rsi -
+	   CHAR_PER_VEC).  */
+
+#ifdef USE_AS_WCSLEN
+	/* Need to compute directly for wcslen as CHAR_SIZE * rsi can
+	   overflow.  */
+	movq	%rdi, %rax
+	andq	$(VEC_SIZE * -1), %rdi
+	subq	%rdi, %rax
+	sarq	$2, %rax
+	leaq	-(CHAR_PER_VEC * 1)(%rax, %rsi), %rax
+#else
+	/* Calculate ptr + N - VEC_SIZE, then mask off the low bits,
+	   then subtract ptr to get the new aligned limit value.  */
+	leaq	(VEC_SIZE * -1)(%rsi, %rdi), %rax
+	andq	$(VEC_SIZE * -1), %rdi
+	subq	%rdi, %rax
+#endif
+
+	VPCMPEQ	VEC_SIZE(%rdi), %VZERO, %k0
+
+	/* Checking here is faster for 256-bit but not 512-bit */
+#if VEC_SIZE == 0
+	KMOV	%k0, %VRDX
+	test	%VRDX, %VRDX
+	jnz	L(last_vec_check)
+#endif
+
+	cmpq	$(CHAR_PER_VEC * 2), %rax
+	ja	L(more_2x_vec)
+
+L(last_2x_vec_or_less):
+
+	/* Checking here is faster for 512-bit but not 256-bit */
+#if VEC_SIZE != 0
+	KMOV	%k0, %VRDX
+	test	%VRDX, %VRDX
+	jnz	L(last_vec_check)
+#endif
+
+	/* Check for the end of data.  */
+	SUB_SHORT (CHAR_PER_VEC, rax)
+	jbe	L(max_0)
+
+	/* Check the final remaining vector.  */
+	VPCMPEQ	(VEC_SIZE * 2)(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRDX
+	test	%VRDX, %VRDX
+#if VEC_SIZE == 32
+	jz	L(max_0)
+#else
+	jnz	L(last_vec_check)
+	P2ALIGN_CLAMPED(4, 2)
+L(zero):
+L(max_0):
+	movl	%esi, %eax
+	ret
+
+#endif
+	P2ALIGN_CLAMPED(4, 4)
+L(last_vec_check):
+	bsf	%VRDX, %VRDX
+	sub	%eax, %edx
+	lea	(%rsi, %rdx), %eax
+	cmovae	%esi, %eax
+	ret
+
+
+#if VEC_SIZE == 32
+	P2ALIGN_CLAMPED(4, 8)
+#endif
+L(last_4x_vec_or_less):
+	addl	$(CHAR_PER_VEC * -4), %eax
+	VPCMPEQ	(VEC_SIZE * 5)(%rdi), %VZERO, %k0
+
+#if VEC_SIZE == 64
+	KMOV	%k0, %VRDX
+	test	%VRDX, %VRDX
+	jnz	L(last_vec_check)
+#endif
+
+	subq	$(VEC_SIZE * -4), %rdi
+	cmpl	$(CHAR_PER_VEC * 2), %eax
+	jbe	L(last_2x_vec_or_less)
+
+	P2ALIGN_CLAMPED(4, 6)
+L(more_2x_vec):
+	/* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without
+	   rechecking bounds.  */
+
+	/* Already checked in 256-bit case */
+#if VEC_SIZE != 0
+	KMOV	%k0, %VRDX
+
+	test	%VRDX, %VRDX
+	jnz	L(first_vec_x1)
+#endif
+
+	VPCMPEQ	(VEC_SIZE * 2)(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRDX
+
+	test	%VRDX, %VRDX
+	jnz	L(first_vec_x2)
+
+	cmpq	$(CHAR_PER_VEC * 4), %rax
+	ja	L(more_4x_vec)
+
+
+	VPCMPEQ	(VEC_SIZE * 3)(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRDX
+	addl	$(CHAR_PER_VEC * -2), %eax
+	test	%VRDX, %VRDX
+	jnz	L(last_vec_check)
+
+	subb	$(CHAR_PER_VEC), %al
+	jbe	L(max_1)
+
+	VPCMPEQ	(VEC_SIZE * 4)(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRDX
+
+	test	%VRDX, %VRDX
+	jnz	L(last_vec_check)
+L(max_1):
+	movl	%esi, %eax
+	ret
+
+
+	P2ALIGN_CLAMPED(4, 14)
+L(first_vec_x2):
+#if VEC_SIZE == 64
+	/* If VEC_SIZE == 64 we can fit logic for full return label in
+	   spare bytes before next cache line.  */
+	bsf	%VRDX, %VRDX
+	sub	%eax, %esi
+	leal	(CHAR_PER_VEC * 1)(%rsi, %rdx), %eax
+	ret
+	P2ALIGN_CLAMPED(4, 6)
+#else
+	addl	$CHAR_PER_VEC, %esi
+#endif
+L(first_vec_x1):
+	bsf	%VRDX, %VRDX
+	sub	%eax, %esi
+	leal	(CHAR_PER_VEC * 0)(%rsi, %rdx), %eax
+	ret
+
+#if VEC_SIZE == 64
+	P2ALIGN_CLAMPED(4, 6)
+L(first_vec_x4):
+# if VEC_SIZE == 64
+	/* If VEC_SIZE == 64 we can fit logic for full return label in
+	   spare bytes before next cache line.  */
+	bsf	%VRDX, %VRDX
+	sub	%eax, %esi
+	leal	(CHAR_PER_VEC * 3)(%rsi, %rdx), %eax
+	ret
+	P2ALIGN_CLAMPED(4, 6)
+# else
+	addl	$CHAR_PER_VEC, %esi
+# endif
+L(first_vec_x3):
+	bsf	%VRDX, %VRDX
+	sub	%eax, %esi
+	leal	(CHAR_PER_VEC * 2)(%rsi, %rdx), %eax
+	ret
+#endif
+
+	P2ALIGN_CLAMPED(6, 20)
+L(more_4x_vec):
+	VPCMPEQ	(VEC_SIZE * 3)(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRDX
+	test	%VRDX, %VRDX
+	jnz	L(first_vec_x3)
+
+	VPCMPEQ	(VEC_SIZE * 4)(%rdi), %VZERO, %k0
+	KMOV	%k0, %VRDX
+	test	%VRDX, %VRDX
+	jnz	L(first_vec_x4)
+
+	/* Check if at last VEC_SIZE * 4 length before aligning for the
+	   loop.  */
+	cmpq	$(CHAR_PER_VEC * 8), %rax
+	jbe	L(last_4x_vec_or_less)
+
+
+	/* Compute number of words checked after aligning.  */
+#ifdef USE_AS_WCSLEN
+	/* Need to compute directly for wcslen as CHAR_SIZE * rsi can
+	   overflow.  */
+	leaq	(VEC_SIZE * -3)(%rdi), %rdx
+#else
+	leaq	(VEC_SIZE * -3)(%rdi, %rax), %rax
+#endif
+
+	subq	$(VEC_SIZE * -1), %rdi
+
+	/* Align data to VEC_SIZE * 4.  */
+#if VEC_SIZE == 64
+	/* Saves code size.  No evex512 processor has partial register
+	   stalls.  If that change this can be replaced with `andq
+	   $-(VEC_SIZE * 4), %rdi`.  */
+	xorb	%dil, %dil
+#else
+	andq	$-(VEC_SIZE * 4), %rdi
+#endif
+
+#ifdef USE_AS_WCSLEN
+	subq	%rdi, %rdx
+	sarq	$2, %rdx
+	addq	%rdx, %rax
+#else
+	subq	%rdi, %rax
+#endif
+
+	// mov     %rdi, %rdx
+
+	P2ALIGN(6)
+L(loop):
+	/* VPMINU and VPCMP combination provide better performance as
+	   compared to alternative combinations.  */
+	VMOVA	(VEC_SIZE * 4)(%rdi), %VMM(1)
+	VPMINU	(VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2)
+	VMOVA	(VEC_SIZE * 6)(%rdi), %VMM(3)
+	VPMINU	(VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4)
+
+	VPTESTN	%VMM(2), %VMM(2), %k0
+	VPTESTN	%VMM(4), %VMM(4), %k1
+
+	subq	$-(VEC_SIZE * 4), %rdi
+	KORTEST	%k0, %k1
+
+	jnz	L(loopend)
+	subq	$(CHAR_PER_VEC * 4), %rax
+	ja	L(loop)
+	mov	%rsi, %rax
+	ret
+
+
+#if VEC_SIZE == 32
+	P2ALIGN_CLAMPED(4, 6)
+L(first_vec_x4):
+# if VEC_SIZE == 64
+	/* If VEC_SIZE == 64 we can fit logic for full return label in
+	   spare bytes before next cache line.  */
+	bsf	%VRDX, %VRDX
+	sub	%eax, %esi
+	leal	(CHAR_PER_VEC * 3)(%rsi, %rdx), %eax
+	ret
+	P2ALIGN_CLAMPED(4, 6)
+# else
+	addl	$CHAR_PER_VEC, %esi
+# endif
+L(first_vec_x3):
+	bsf	%VRDX, %VRDX
+	sub	%eax, %esi
+	leal	(CHAR_PER_VEC * 2)(%rsi, %rdx), %eax
+	ret
+#endif
+
+
+	P2ALIGN_CLAMPED(4, 11)
+L(loopend):
+	/* We found a null terminator in one of the 4 vectors.  */
+
+	/* Check the first vector.  */
+	movq	%rax, %r8
+	VPTESTN	%VMM(1), %VMM(1), %k2
+	KMOV	%k2, %VRCX
+	bsf	%rcx, %r8
+
+	cmpq	$(CHAR_PER_VEC), %r8
+	jbe	L(end_vec)
+
+	/* Check the second vector.  */
+	subq	$(CHAR_PER_VEC), %rax
+	movq	%rax, %r8
+	KMOV	%k0, %VRCX
+	bsf	%rcx, %r8
+
+	cmpq	$(CHAR_PER_VEC), %r8
+	jbe	L(end_vec)
+
+	/* Check the third vector.  */
+	subq	$(CHAR_PER_VEC), %rax
+	movq	%rax, %r8
+	VPTESTN	%VMM(3), %VMM(3), %k2
+	KMOV	%k2, %VRCX
+	bsf	%rcx, %r8
+
+	cmpq	$(CHAR_PER_VEC), %r8
+	jbe	L(end_vec)
+
+	/* It is in the fourth vector.  */
+	subq	$(CHAR_PER_VEC), %rax
+	movq	%rax, %r8
+	KMOV	%k1, %VRCX
+	bsf	%rcx, %r8
+
+	P2ALIGN_CLAMPED(4, 3)
+L(end_vec):
+	/* Get the number that has been processed.  */
+	movq	%rsi, %rcx
+	subq	%rax, %rcx
+
+	/* Add that to the offset we found the null terminator at.  */
+	leaq	(%r8, %rcx), %rax
+
+	/* Take the min of that and the limit.  */
+	cmpq	%rsi, %rax
+	cmovnb	%rsi, %rax
+	ret
+
+	P2ALIGN_CLAMPED(4, 11)
+L(crosses_page_boundary):
+	/* Align data backwards to VEC_SIZE.  */
+	shrl	$20, %eax
+	movq	%rdi, %rcx
+	andq	$-VEC_SIZE, %rcx
+	VPCMPEQ	(%rcx), %VZERO, %k0
+
+	KMOV	%k0, %VRCX
+#ifdef USE_AS_WCSLEN
+	shrl	$2, %eax
+	andl	$(CHAR_PER_VEC - 1), %eax
+#endif
+	/* By this point rax contains number of bytes we need to skip.  */
+	shrx	%VRAX, %VRCX, %VRCX
+
+	/* Calculates CHAR_PER_VEC - eax and stores in eax.  */
+	negl	%eax
+	andl	$(CHAR_PER_VEC - 1), %eax
+
+	movq	%rsi, %rdx
+	bsf	%VRCX, %VRDX
+	cmpq	%rax, %rdx
+	ja	L(cross_page_continue)
+
+	/* The vector had a null terminator or we are at the limit.  */
+	movl	%edx, %eax
+	cmpq	%rdx, %rsi
+	cmovb	%esi, %eax
+	ret
+
+END(STRNLEN)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S
index 91b16830eb..c41288906c 100644
--- a/sysdeps/x86_64/multiarch/strnlen-evex.S
+++ b/sysdeps/x86_64/multiarch/strnlen-evex.S
@@ -1,423 +1,7 @@
-/* strnlen/wcsnlen optimized with 256-bit EVEX instructions.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <isa-level.h>
-#include <sysdep.h>
-
-#if ISA_SHOULD_BUILD (4)
-
-# ifndef VEC_SIZE
-#  include "x86-evex256-vecs.h"
-# endif
-
-
-# ifndef STRNLEN
-#  define STRNLEN	__strnlen_evex
-# endif
-
-# ifdef USE_AS_WCSLEN
-#  define VPCMPEQ	vpcmpeqd
-#  define VPCMPNEQ	vpcmpneqd
-#  define VPTESTN	vptestnmd
-#  define VPTEST	vptestmd
-#  define VPMINU	vpminud
-#  define CHAR_SIZE	4
-
-# else
-#  define VPCMPEQ	vpcmpeqb
-#  define VPCMPNEQ	vpcmpneqb
-#  define VPTESTN	vptestnmb
-#  define VPTEST	vptestmb
-#  define VPMINU	vpminub
-#  define CHAR_SIZE	1
-
-#  define REG_WIDTH	VEC_SIZE
-# endif
-
-# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
-
-# include "reg-macros.h"
-
-# if CHAR_PER_VEC == 32
-#  define SUB_SHORT(imm, reg)	subb $(imm), %VGPR_SZ(reg, 8)
-# else
-#  define SUB_SHORT(imm, reg)	subl $(imm), %VGPR_SZ(reg, 32)
-# endif
-
-
-
-# if CHAR_PER_VEC == 64
-#  define FALLTHROUGH_RETURN_OFFSET	(CHAR_PER_VEC * 3)
-# else
-#  define FALLTHROUGH_RETURN_OFFSET	(CHAR_PER_VEC * 2)
-# endif
-
-
-# define XZERO	VMM_128(0)
-# define VZERO	VMM(0)
-# define PAGE_SIZE	4096
-
-	.section SECTION(.text), "ax", @progbits
-ENTRY_P2ALIGN (STRNLEN, 6)
-	/* Check zero length.  */
-	test	%RSI_LP, %RSI_LP
-	jz	L(zero)
-# ifdef __ILP32__
-	/* Clear the upper 32 bits.  */
-	movl	%esi, %esi
-# endif
-
-	movl	%edi, %eax
-	vpxorq	%XZERO, %XZERO, %XZERO
-	andl	$(PAGE_SIZE - 1), %eax
-	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
-	ja	L(cross_page_boundary)
-
-	/* Check the first VEC_SIZE bytes.  Each bit in K0 represents a
-	   null byte.  */
-	VPCMPEQ	(%rdi), %VZERO, %k0
-
-	KMOV	%k0, %VRCX
-	movq	%rsi, %rax
-
-	/* If src (rcx) is zero, bsf does not change the result.  NB:
-	   Must use 64-bit bsf here so that upper bits of len are not
-	   cleared.  */
-	bsfq	%rcx, %rax
-	/* If rax > CHAR_PER_VEC then rcx must have been zero (no null
-	   CHAR) and rsi must be > CHAR_PER_VEC.  */
-	cmpq	$CHAR_PER_VEC, %rax
-	ja	L(more_1x_vec)
-	/* Check if first match in bounds.  */
-	cmpq	%rax, %rsi
-	cmovb	%esi, %eax
-	ret
-
-
-# if CHAR_PER_VEC != 32
-	.p2align 4,, 2
-L(zero):
-L(max_0):
-	movl	%esi, %eax
-	ret
-# endif
-
-	/* Aligned more for strnlen compares remaining length vs 2 *
-	   CHAR_PER_VEC, 4 * CHAR_PER_VEC, and 8 * CHAR_PER_VEC before
-	   going to the loop.  */
-	.p2align 4,, 10
-L(more_1x_vec):
-L(cross_page_continue):
-	/* Compute number of words checked after aligning.  */
-# ifdef USE_AS_WCSLEN
-	/* Need to compute directly for wcslen as CHAR_SIZE * rsi can
-	   overflow.  */
-	movq	%rdi, %rax
-	andq	$(VEC_SIZE * -1), %rdi
-	subq	%rdi, %rax
-	sarq	$2, %rax
-	leaq	-(CHAR_PER_VEC * 1)(%rax, %rsi), %rax
-# else
-	leaq	(VEC_SIZE * -1)(%rsi, %rdi), %rax
-	andq	$(VEC_SIZE * -1), %rdi
-	subq	%rdi, %rax
-# endif
-
-
-	VPCMPEQ	VEC_SIZE(%rdi), %VZERO, %k0
-
-	cmpq	$(CHAR_PER_VEC * 2), %rax
-	ja	L(more_2x_vec)
-
-L(last_2x_vec_or_less):
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(last_vec_check)
-
-	/* Check the end of data.  */
-	SUB_SHORT (CHAR_PER_VEC, rax)
-	jbe	L(max_0)
-	VPCMPEQ	(VEC_SIZE * 2)(%rdi), %VZERO, %k0
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jz	L(max_0)
-	/* Best place for LAST_VEC_CHECK if ZMM.  */
-	.p2align 4,, 8
-L(last_vec_check):
-	bsf	%VRDX, %VRDX
-	sub	%eax, %edx
-	lea	(%rsi, %rdx), %eax
-	cmovae	%esi, %eax
-	ret
-
-# if CHAR_PER_VEC == 32
-	.p2align 4,, 2
-L(zero):
-L(max_0):
-	movl	%esi, %eax
-	ret
-# endif
-
-	.p2align 4,, 8
-L(last_4x_vec_or_less):
-	addl	$(CHAR_PER_VEC * -4), %eax
-	VPCMPEQ	(VEC_SIZE * 5)(%rdi), %VZERO, %k0
-	subq	$(VEC_SIZE * -4), %rdi
-	cmpl	$(CHAR_PER_VEC * 2), %eax
-	jbe	L(last_2x_vec_or_less)
-
-	.p2align 4,, 6
-L(more_2x_vec):
-	/* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without
-	   rechecking bounds.  */
-
-	KMOV	%k0, %VRDX
-
-	test	%VRDX, %VRDX
-	jnz	L(first_vec_x1)
-
-	VPCMPEQ	(VEC_SIZE * 2)(%rdi), %VZERO, %k0
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(first_vec_x2)
-
-	cmpq	$(CHAR_PER_VEC * 4), %rax
-	ja	L(more_4x_vec)
-
-
-	VPCMPEQ	(VEC_SIZE * 3)(%rdi), %VZERO, %k0
-	KMOV	%k0, %VRDX
-	addl	$(CHAR_PER_VEC * -2), %eax
-	test	%VRDX, %VRDX
-	jnz	L(last_vec_check)
-
-	subl	$(CHAR_PER_VEC), %eax
-	jbe	L(max_1)
-
-	VPCMPEQ	(VEC_SIZE * 4)(%rdi), %VZERO, %k0
-	KMOV	%k0, %VRDX
-
-	test	%VRDX, %VRDX
-	jnz	L(last_vec_check)
-L(max_1):
-	movl	%esi, %eax
-	ret
-
-	.p2align 4,, 3
-L(first_vec_x2):
-# if VEC_SIZE == 64
-	/* If VEC_SIZE == 64 we can fit logic for full return label in
-	   spare bytes before next cache line.  */
-	bsf	%VRDX, %VRDX
-	sub	%eax, %esi
-	leal	(CHAR_PER_VEC * 1)(%rsi, %rdx), %eax
-	ret
-	.p2align 4,, 6
-# else
-	addl	$CHAR_PER_VEC, %esi
-# endif
-L(first_vec_x1):
-	bsf	%VRDX, %VRDX
-	sub	%eax, %esi
-	leal	(CHAR_PER_VEC * 0)(%rsi, %rdx), %eax
-	ret
-
-
-	.p2align 4,, 6
-L(first_vec_x4):
-# if VEC_SIZE == 64
-	/* If VEC_SIZE == 64 we can fit logic for full return label in
-	   spare bytes before next cache line.  */
-	bsf	%VRDX, %VRDX
-	sub	%eax, %esi
-	leal	(CHAR_PER_VEC * 3)(%rsi, %rdx), %eax
-	ret
-	.p2align 4,, 6
-# else
-	addl	$CHAR_PER_VEC, %esi
-# endif
-L(first_vec_x3):
-	bsf	%VRDX, %VRDX
-	sub	%eax, %esi
-	leal	(CHAR_PER_VEC * 2)(%rsi, %rdx), %eax
-	ret
-
-	.p2align 4,, 5
-L(more_4x_vec):
-	VPCMPEQ	(VEC_SIZE * 3)(%rdi), %VZERO, %k0
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(first_vec_x3)
-
-	VPCMPEQ	(VEC_SIZE * 4)(%rdi), %VZERO, %k0
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(first_vec_x4)
-
-	/* Check if at last VEC_SIZE * 4 length before aligning for the
-	   loop.  */
-	cmpq	$(CHAR_PER_VEC * 8), %rax
-	jbe	L(last_4x_vec_or_less)
-
-
-	/* Compute number of words checked after aligning.  */
-# ifdef USE_AS_WCSLEN
-	/* Need to compute directly for wcslen as CHAR_SIZE * rsi can
-	   overflow.  */
-	leaq	(VEC_SIZE * -3)(%rdi), %rdx
-# else
-	leaq	(VEC_SIZE * -3)(%rdi, %rax), %rax
-# endif
-
-	subq	$(VEC_SIZE * -1), %rdi
-
-	/* Align data to VEC_SIZE * 4.  */
-# if VEC_SIZE == 64
-	/* Saves code size.  No evex512 processor has partial register
-	   stalls.  If that change this can be replaced with `andq
-	   $-(VEC_SIZE * 4), %rdi`.  */
-	xorb	%dil, %dil
-# else
-	andq	$-(VEC_SIZE * 4), %rdi
-# endif
-
-# ifdef USE_AS_WCSLEN
-	subq	%rdi, %rdx
-	sarq	$2, %rdx
-	addq	%rdx, %rax
-# else
-	subq	%rdi, %rax
-# endif
-	/* Compare 4 * VEC at a time forward.  */
-	.p2align 4,, 11
-L(loop_4x_vec):
-	VMOVA	(VEC_SIZE * 4)(%rdi), %VMM(1)
-	VPMINU	(VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2)
-	VMOVA	(VEC_SIZE * 6)(%rdi), %VMM(3)
-	VPMINU	(VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4)
-	VPTESTN	%VMM(2), %VMM(2), %k0
-	VPTESTN	%VMM(4), %VMM(4), %k2
-	subq	$-(VEC_SIZE * 4), %rdi
-	/* Break if at end of length.  */
-	subq	$(CHAR_PER_VEC * 4), %rax
-	jbe	L(loop_len_end)
-
-
-	KORTEST %k0, %k2
-	jz	L(loop_4x_vec)
-
-
-L(loop_last_4x_vec):
-	movq	%rsi, %rcx
-	subq	%rax, %rsi
-	VPTESTN	%VMM(1), %VMM(1), %k1
-	KMOV	%k1, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(last_vec_x0)
-
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(last_vec_x1)
-
-	VPTESTN	%VMM(3), %VMM(3), %k0
-
-	/* Separate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for
-	   returning last 2x VEC. For VEC_SIZE == 64 we test each VEC
-	   individually, for VEC_SIZE == 32 we combine them in a single
-	   64-bit GPR.  */
-# if CHAR_PER_VEC == 64
-	KMOV	%k0, %VRDX
-	test	%VRDX, %VRDX
-	jnz	L(last_vec_x2)
-	KMOV	%k2, %VRDX
-# else
-	/* We can only combine last 2x VEC masks if CHAR_PER_VEC <= 32.
-	 */
-	kmovd	%k2, %edx
-	kmovd	%k0, %eax
-	salq	$CHAR_PER_VEC, %rdx
-	orq	%rax, %rdx
-# endif
-
-	/* first_vec_x3 for strlen-ZMM and first_vec_x2 for strlen-YMM.
-	 */
-	bsfq	%rdx, %rdx
-	leaq	(FALLTHROUGH_RETURN_OFFSET - CHAR_PER_VEC * 4)(%rsi, %rdx), %rax
-	cmpq	%rax, %rcx
-	cmovb	%rcx, %rax
-	ret
-
-	/* Handle last 4x VEC after loop. All VECs have been loaded.  */
-	.p2align 4,, 4
-L(loop_len_end):
-	KORTEST %k0, %k2
-	jnz	L(loop_last_4x_vec)
-	movq	%rsi, %rax
-	ret
-
-
-# if CHAR_PER_VEC == 64
-	/* Since we can't combine the last 2x VEC for VEC_SIZE == 64
-	   need return label for it.  */
-	.p2align 4,, 8
-L(last_vec_x2):
-	bsf	%VRDX, %VRDX
-	leaq	(CHAR_PER_VEC * -2)(%rsi, %rdx), %rax
-	cmpq	%rax, %rcx
-	cmovb	%rcx, %rax
-	ret
-# endif
-
-
-	.p2align 4,, 10
-L(last_vec_x1):
-	addq	$CHAR_PER_VEC, %rsi
-L(last_vec_x0):
-	bsf	%VRDX, %VRDX
-	leaq	(CHAR_PER_VEC * -4)(%rsi, %rdx), %rax
-	cmpq	%rax, %rcx
-	cmovb	%rcx, %rax
-	ret
-
-
-	.p2align 4,, 8
-L(cross_page_boundary):
-	/* Align data to VEC_SIZE.  */
-	movq	%rdi, %rcx
-	andq	$-VEC_SIZE, %rcx
-	VPCMPEQ	(%rcx), %VZERO, %k0
-
-	KMOV	%k0, %VRCX
-# ifdef USE_AS_WCSLEN
-	shrl	$2, %eax
-	andl	$(CHAR_PER_VEC - 1), %eax
-# endif
-	shrx	%VRAX, %VRCX, %VRCX
-
-	negl	%eax
-	andl	$(CHAR_PER_VEC - 1), %eax
-	movq	%rsi, %rdx
-	bsf	%VRCX, %VRDX
-	cmpq	%rax, %rdx
-	ja	L(cross_page_continue)
-	movl	%edx, %eax
-	cmpq	%rdx, %rsi
-	cmovb	%esi, %eax
-	ret
-END (STRNLEN)
+#ifndef STRNLEN
+#define STRNLEN __strnlen_evex
 #endif
+
+#include "x86-evex256-vecs.h"
+#include "reg-macros.h"
+#include "strnlen-evex-base.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen-evex512.S b/sysdeps/x86_64/multiarch/strnlen-evex512.S
index f8e55883bb..8ef54078f8 100644
--- a/sysdeps/x86_64/multiarch/strnlen-evex512.S
+++ b/sysdeps/x86_64/multiarch/strnlen-evex512.S
@@ -1,264 +1,7 @@
-/* Placeholder function, not used by any processor at the moment.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
 #ifndef STRNLEN
 #define STRNLEN __strnlen_evex512
 #endif
 
 #include "x86-evex512-vecs.h"
 #include "reg-macros.h"
-
-#include <isa-level.h>
-
-#if ISA_SHOULD_BUILD (4)
-
-# include <sysdep.h>
-
-# ifdef USE_AS_WCSLEN
-#  define VPCMPEQ	vpcmpeqd
-#  define VPTESTN	vptestnmd
-#  define VPMINU	vpminud
-#  define CHAR_SIZE	4
-# else
-#  define VPCMPEQ	vpcmpeqb
-#  define VPTESTN	vptestnmb
-#  define VPMINU	vpminub
-#  define CHAR_SIZE	1
-# endif
-
-# define PAGE_SIZE	4096
-# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
-
-	.section SECTION(.text),"ax",@progbits
-/* Aligning entry point to 64 byte, provides better performance for
-   one vector length string.  */
-ENTRY_P2ALIGN (STRNLEN, 6)
-	/* Check zero length.  */
-	test	%RSI_LP, %RSI_LP
-	jz	L(ret_max)
-#  ifdef __ILP32__
-	/* Clear the upper 32 bits.  */
-	movl	%esi, %esi
-#  endif
-
-	movl	%edi, %eax
-	vpxorq	%VMM_128(0), %VMM_128(0), %VMM_128(0)
-	sall	$20, %eax
-	cmpl	$((PAGE_SIZE - VEC_SIZE) << 20), %eax
-	ja	L(page_cross)
-
-	/* Compare [w]char for null, mask bit will be set for match.  */
-	VPCMPEQ	(%rdi), %VMM(0), %k0
-	KMOV	%k0, %VRCX
-	/* Store max length in rax.  */
-	mov	%rsi, %rax
-	/* If rcx is 0, rax will have max length.  We can not use VRCX
-	   and VRAX here for evex256 because, upper 32 bits may be
-	   undefined for ecx and eax.  */
-	bsfq	%rcx, %rax
-	cmp	$CHAR_PER_VEC, %rax
-	ja	L(align_more)
-	cmpq	%rax, %rsi
-	cmovb	%esi, %eax
-	ret
-
-	/* At this point vector max length reached.  */
-	.p2align 4,,3
-L(ret_max):
-	movq	%rsi, %rax
-	ret
-
-L(align_more):
-	mov	%rdi, %rax
-	/* Align rax to VEC_SIZE.  */
-	andq	$-VEC_SIZE, %rax
-	movq	%rdi, %rdx
-	subq	%rax, %rdx
-#  ifdef USE_AS_WCSLEN
-	shr	$2, %VRDX
-#  endif
-	/* At this point rdx contains [w]chars already compared.  */
-	leaq	-CHAR_PER_VEC(%rsi, %rdx), %rdx
-	/* At this point rdx contains number of w[char] needs to go.
-	   Now onwards rdx will keep decrementing with each compare.  */
-
-	/* Loop unroll 4 times for 4 vector loop.  */
-	VPCMPEQ	VEC_SIZE(%rax), %VMM(0), %k0
-	subq	$-VEC_SIZE, %rax
-	KMOV	%k0, %VRCX
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x1)
-
-	subq	$CHAR_PER_VEC, %rdx
-	jbe	L(ret_max)
-
-	VPCMPEQ	VEC_SIZE(%rax), %VMM(0), %k0
-	KMOV	%k0, %VRCX
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x2)
-
-	subq	$CHAR_PER_VEC, %rdx
-	jbe	L(ret_max)
-
-	VPCMPEQ	(VEC_SIZE * 2)(%rax), %VMM(0), %k0
-	KMOV	%k0, %VRCX
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x3)
-
-	subq	$CHAR_PER_VEC, %rdx
-	jbe	L(ret_max)
-
-	VPCMPEQ	(VEC_SIZE * 3)(%rax), %VMM(0), %k0
-	KMOV	%k0, %VRCX
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x4)
-
-	subq	$CHAR_PER_VEC, %rdx
-	jbe	L(ret_max)
-	/* Save pointer before 4 x VEC_SIZE alignment.  */
-	movq	%rax, %rcx
-
-	/* Align address to VEC_SIZE * 4 for loop.  */
-	andq	$-(VEC_SIZE * 4), %rax
-
-	subq	%rax, %rcx
-#  ifdef USE_AS_WCSLEN
-	shr	$2, %VRCX
-#  endif
-	/* rcx contains number of [w]char will be recompared due to
-	   alignment fixes.  rdx must be incremented by rcx to offset
-	   alignment adjustment.  */
-	addq	%rcx, %rdx
-	/* Need jump as we don't want to add/subtract rdx for first
-	   iteration of 4 x VEC_SIZE aligned loop.  */
-
-	.p2align 4,,11
-L(loop):
-	/* VPMINU and VPCMP combination provide better performance as
-	   compared to alternative combinations.  */
-	VMOVA	(VEC_SIZE * 4)(%rax), %VMM(1)
-	VPMINU	(VEC_SIZE * 5)(%rax), %VMM(1), %VMM(2)
-	VMOVA	(VEC_SIZE * 6)(%rax), %VMM(3)
-	VPMINU	(VEC_SIZE * 7)(%rax), %VMM(3), %VMM(4)
-
-	VPTESTN	%VMM(2), %VMM(2), %k0
-	VPTESTN	%VMM(4), %VMM(4), %k1
-
-	subq	$-(VEC_SIZE * 4), %rax
-	KORTEST	%k0, %k1
-
-	jnz	L(loopend)
-	subq	$(CHAR_PER_VEC * 4), %rdx
-	ja	L(loop)
-	mov	%rsi, %rax
-	ret
-
-L(loopend):
-
-	VPTESTN	%VMM(1), %VMM(1), %k2
-	KMOV	%k2, %VRCX
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x1)
-
-	KMOV	%k0, %VRCX
-	/* At this point, if k0 is non zero, null char must be in the
-	   second vector.  */
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x2)
-
-	VPTESTN	%VMM(3), %VMM(3), %k3
-	KMOV	%k3, %VRCX
-	test	%VRCX, %VRCX
-	jnz	L(ret_vec_x3)
-	/* At this point null [w]char must be in the fourth vector so no
-	   need to check.  */
-	KMOV	%k1, %VRCX
-
-	/* Fourth, third, second vector terminating are pretty much
-	   same, implemented this way to avoid branching and reuse code
-	   from pre loop exit condition.  */
-L(ret_vec_x4):
-	bsf	%VRCX, %VRCX
-	subq	%rdi, %rax
-# ifdef USE_AS_WCSLEN
-	subq	$-(VEC_SIZE * 3), %rax
-	shrq	$2, %rax
-	addq	%rcx, %rax
-# else
-	leaq	(VEC_SIZE * 3)(%rcx, %rax), %rax
-# endif
-
-	cmpq	%rsi, %rax
-	cmovnb	%rsi, %rax
-	ret
-
-L(ret_vec_x3):
-	bsf	%VRCX, %VRCX
-	subq	%rdi, %rax
-# ifdef USE_AS_WCSLEN
-	subq	$-(VEC_SIZE * 2), %rax
-	shrq	$2, %rax
-	addq	%rcx, %rax
-# else
-	leaq	(VEC_SIZE * 2)(%rcx, %rax), %rax
-# endif
-	cmpq	%rsi, %rax
-	cmovnb	%rsi, %rax
-	ret
-
-L(ret_vec_x2):
-	subq	$-VEC_SIZE, %rax
-L(ret_vec_x1):
-	bsf	%VRCX, %VRCX
-	subq	%rdi, %rax
-# ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-# endif
-	addq	%rcx, %rax
-	cmpq	%rsi, %rax
-	cmovnb	%rsi, %rax
-	ret
-
-L(page_cross):
-	mov	%rdi, %rax
-	movl	%edi, %ecx
-	andl	$(VEC_SIZE - 1), %ecx
-# ifdef USE_AS_WCSLEN
-	sarl	$2, %ecx
-# endif
-	/* ecx contains number of w[char] to be skipped as a result
-	   of address alignment.  */
-	andq	$-VEC_SIZE, %rax
-	VPCMPEQ	(%rax), %VMM(0), %k0
-	KMOV	%k0, %VRDX
-	/* Ignore number of character for alignment adjustment.  */
-	shr	%cl, %VRDX
-	jnz	L(page_cross_end)
-	movl    $CHAR_PER_VEC, %eax
-	sub     %ecx, %eax
-	cmp	%rax, %rsi
-	ja	L(align_more)
-
-L(page_cross_end):
-	bsf	%VRDX, %VRAX
-	cmpq	%rsi, %rax
-	cmovnb	%esi, %eax
-	ret
-
-END (STRNLEN)
-#endif
+#include "strnlen-evex-base.S"
\ No newline at end of file
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
index 2bbd563a6c..988a4b8593 100644
--- a/sysdeps/x86_64/nptl/tcb-offsets.sym
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -13,6 +13,3 @@ MULTIPLE_THREADS_OFFSET	offsetof (tcbhead_t, multiple_threads)
 POINTER_GUARD		offsetof (tcbhead_t, pointer_guard)
 FEATURE_1_OFFSET	offsetof (tcbhead_t, feature_1)
 SSP_BASE_OFFSET		offsetof (tcbhead_t, ssp_base)
-
--- Not strictly offsets, but these values are also used in the TCB.
-TCB_CANCELED_BITMASK	 CANCELED_BITMASK