232 files changed, 8235 insertions, 566 deletions
diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
index 5f2da91ebb..31782b66f9 100644
--- a/sysdeps/aarch64/cpu-features.h
+++ b/sysdeps/aarch64/cpu-features.h
@@ -47,13 +47,6 @@
 #define IS_THUNDERX2(midr) (MIDR_IMPLEMENTOR(midr) == 'C'       \
 			   && MIDR_PARTNUM(midr) == 0xaf)
 
-#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A'		      \
-			      && MIDR_PARTNUM(midr) == 0xd0c)
-#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A'		      \
-			      && MIDR_PARTNUM(midr) == 0xd49)
-#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A'		      \
-			      && MIDR_PARTNUM(midr) == 0xd40)
-
 #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P'			      \
                        && MIDR_PARTNUM(midr) == 0x000)
 
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index e8af35099d..234a6c457c 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -5,6 +5,7 @@ libmvec-supported-funcs = acos \
                           atan \
                           atanh \
                           atan2 \
+                          cbrt \
                           cos \
                           cosh \
                           erf \
@@ -13,10 +14,12 @@ libmvec-supported-funcs = acos \
                           exp10 \
                           exp2 \
                           expm1 \
+                          hypot \
                           log \
                           log10 \
                           log1p \
                           log2 \
+                          pow \
                           sin \
                           sinh \
                           tan \
@@ -42,7 +45,10 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
                   sv_erff_data \
                   v_exp_tail_data \
                   erfc_data \
-                  erfcf_data
+                  erfcf_data \
+                  v_pow_exp_data \
+                  v_pow_log_data \
+                  v_powf_data
 endif
 
 sve-cflags = -march=armv8-a+sve
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 3cb1b82bd2..cc15ce2d1e 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -94,6 +94,11 @@ libmvec {
     _ZGVnN4v_atanhf;
     _ZGVsMxv_atanh;
     _ZGVsMxv_atanhf;
+    _ZGVnN2v_cbrt;
+    _ZGVnN2v_cbrtf;
+    _ZGVnN4v_cbrtf;
+    _ZGVsMxv_cbrt;
+    _ZGVsMxv_cbrtf;
     _ZGVnN2v_cosh;
     _ZGVnN2v_coshf;
     _ZGVnN4v_coshf;
@@ -109,6 +114,16 @@ libmvec {
     _ZGVnN4v_erfcf;
     _ZGVsMxv_erfc;
     _ZGVsMxv_erfcf;
+    _ZGVnN4vv_hypotf;
+    _ZGVnN2vv_hypotf;
+    _ZGVnN2vv_hypot;
+    _ZGVsMxvv_hypotf;
+    _ZGVsMxvv_hypot;
+    _ZGVnN4vv_powf;
+    _ZGVnN2vv_powf;
+    _ZGVnN2vv_pow;
+    _ZGVsMxvv_powf;
+    _ZGVsMxvv_pow;
     _ZGVnN2v_sinh;
     _ZGVnN2v_sinhf;
     _ZGVnN4v_sinhf;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 383c436972..097d403ffe 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -23,6 +23,7 @@ libmvec_hidden_proto (V_NAME_F1(asin));
 libmvec_hidden_proto (V_NAME_F1(asinh));
 libmvec_hidden_proto (V_NAME_F1(atan));
 libmvec_hidden_proto (V_NAME_F1(atanh));
+libmvec_hidden_proto (V_NAME_F1(cbrt));
 libmvec_hidden_proto (V_NAME_F1(cos));
 libmvec_hidden_proto (V_NAME_F1(cosh));
 libmvec_hidden_proto (V_NAME_F1(erf));
@@ -31,10 +32,12 @@ libmvec_hidden_proto (V_NAME_F1(exp10));
 libmvec_hidden_proto (V_NAME_F1(exp2));
 libmvec_hidden_proto (V_NAME_F1(exp));
 libmvec_hidden_proto (V_NAME_F1(expm1));
+libmvec_hidden_proto (V_NAME_F2(hypot));
 libmvec_hidden_proto (V_NAME_F1(log10));
 libmvec_hidden_proto (V_NAME_F1(log1p));
 libmvec_hidden_proto (V_NAME_F1(log2));
 libmvec_hidden_proto (V_NAME_F1(log));
+libmvec_hidden_proto (V_NAME_F2(pow));
 libmvec_hidden_proto (V_NAME_F1(sin));
 libmvec_hidden_proto (V_NAME_F1(sinh));
 libmvec_hidden_proto (V_NAME_F1(tan));
diff --git a/sysdeps/aarch64/fpu/asinh_advsimd.c b/sysdeps/aarch64/fpu/asinh_advsimd.c
index 544a52f651..6207e7da95 100644
--- a/sysdeps/aarch64/fpu/asinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinh_advsimd.c
@@ -22,6 +22,7 @@
 
 #define A(i) v_f64 (__v_log_data.poly[i])
 #define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
 
 const static struct data
 {
@@ -63,11 +64,15 @@ struct entry
 static inline struct entry
 lookup (uint64x2_t i)
 {
-  float64x2_t e0 = vld1q_f64 (
-      &__v_log_data.table[(i[0] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
-  float64x2_t e1 = vld1q_f64 (
-      &__v_log_data.table[(i[1] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
-  return (struct entry){ vuzp1q_f64 (e0, e1), vuzp2q_f64 (e0, e1) };
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
+  return e;
 }
 
 static inline float64x2_t
diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c
index 2fd6164134..b1e7a9b8fc 100644
--- a/sysdeps/aarch64/fpu/atan2_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan2_advsimd.c
@@ -17,6 +17,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include "math_config.h"
 #include "v_math.h"
 #include "poly_advsimd_f64.h"
 
diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
index 04fa71fa37..ed9f683436 100644
--- a/sysdeps/aarch64/fpu/atan2_sve.c
+++ b/sysdeps/aarch64/fpu/atan2_sve.c
@@ -17,6 +17,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include "math_config.h"
 #include "sv_math.h"
 #include "poly_sve_f64.h"
 
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index e29b2d1c09..7484150131 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -57,6 +57,10 @@
 # define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2f
 # define __DECL_SIMD_atan2f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cbrt
+# define __DECL_SIMD_cbrt __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cbrtf
+# define __DECL_SIMD_cbrtf __DECL_SIMD_aarch64
 # undef __DECL_SIMD_cos
 # define __DECL_SIMD_cos __DECL_SIMD_aarch64
 # undef __DECL_SIMD_cosf
@@ -89,6 +93,10 @@
 # define __DECL_SIMD_expm1 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_expm1f
 # define __DECL_SIMD_expm1f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_hypot
+# define __DECL_SIMD_hypot __DECL_SIMD_aarch64
+# undef __DECL_SIMD_hypotf
+# define __DECL_SIMD_hypotf __DECL_SIMD_aarch64
 # undef __DECL_SIMD_log
 # define __DECL_SIMD_log __DECL_SIMD_aarch64
 # undef __DECL_SIMD_logf
@@ -105,6 +113,10 @@
 # define __DECL_SIMD_log2 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_log2f
 # define __DECL_SIMD_log2f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_pow
+# define __DECL_SIMD_pow __DECL_SIMD_aarch64
+# undef __DECL_SIMD_powf
+# define __DECL_SIMD_powf __DECL_SIMD_aarch64
 # undef __DECL_SIMD_sin
 # define __DECL_SIMD_sin __DECL_SIMD_aarch64
 # undef __DECL_SIMD_sinf
@@ -154,6 +166,7 @@ __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
@@ -162,10 +175,12 @@ __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_exp10f (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_expm1f (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_hypotf (__f32x4_t, __f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
@@ -178,6 +193,7 @@ __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
@@ -186,10 +202,12 @@ __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_exp10 (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_exp2 (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_expm1 (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_hypot (__f64x2_t, __f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
@@ -207,6 +225,7 @@ __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
@@ -215,10 +234,12 @@ __sv_f32_t _ZGVsMxv_expf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_exp10f (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_exp2f (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_expm1f (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxvv_hypotf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t);
@@ -231,6 +252,7 @@ __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
@@ -239,10 +261,12 @@ __sv_f64_t _ZGVsMxv_exp (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_exp10 (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_exp2 (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_expm1 (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxvv_hypot (__sv_f64_t, __sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_tan (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cbrt_advsimd.c b/sysdeps/aarch64/fpu/cbrt_advsimd.c
new file mode 100644
index 0000000000..adfbb60cd3
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrt_advsimd.c
@@ -0,0 +1,121 @@
+/* Double-precision vector (AdvSIMD) cbrt function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+const static struct data
+{
+  float64x2_t poly[4], one_third, shift;
+  int64x2_t exp_bias;
+  uint64x2_t abs_mask, tiny_bound;
+  uint32x4_t thresh;
+  double table[5];
+} data = {
+  .shift = V2 (0x1.8p52),
+  .poly = { /* Generated with fpminimax in [0.5, 1].  */
+            V2 (0x1.c14e8ee44767p-2), V2 (0x1.dd2d3f99e4c0ep-1),
+	    V2 (-0x1.08e83026b7e74p-1), V2 (0x1.2c74eaa3ba428p-3) },
+  .exp_bias = V2 (1022),
+  .abs_mask = V2(0x7fffffffffffffff),
+  .tiny_bound = V2(0x0010000000000000), /* Smallest normal.  */
+  .thresh = V4(0x7fe00000),   /* asuint64 (infinity) - tiny_bound.  */
+  .one_third = V2(0x1.5555555555555p-2),
+  .table = { /* table[i] = 2^((i - 2) / 3).  */
+             0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
+	     0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0 }
+};
+
+#define MantissaMask v_u64 (0x000fffffffffffff)
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint32x2_t special)
+{
+  return v_call_f64 (cbrt, x, y, vmovl_u32 (special));
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order polynomial
+   and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+   according to the exponent, for instance an error observed for double value
+   m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
+   integer.
+   __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+				 want 0x1.965fe72821e99p+0.  */
+VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+  /* Subnormal, +/-0 and special values.  */
+  uint32x2_t special
+      = vcge_u32 (vsubhn_u64 (iax, d->tiny_bound), vget_low_u32 (d->thresh));
+
+  /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+     version of frexp, which gets subnormal values wrong - these have to be
+     special-cased as a result.  */
+  float64x2_t m = vbslq_f64 (MantissaMask, x, v_f64 (0.5));
+  int64x2_t exp_bias = d->exp_bias;
+  uint64x2_t ia12 = vshrq_n_u64 (iax, 52);
+  int64x2_t e = vsubq_s64 (vreinterpretq_s64_u64 (ia12), exp_bias);
+
+  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
+     Newton iterations.  */
+  float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly);
+  float64x2_t one_third = d->one_third;
+  /* Two iterations of Newton's method for iteratively approximating cbrt.  */
+  float64x2_t m_by_3 = vmulq_f64 (m, one_third);
+  float64x2_t two_thirds = vaddq_f64 (one_third, one_third);
+  float64x2_t a
+      = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (p, p)), two_thirds, p);
+  a = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (a, a)), two_thirds, a);
+
+  /* Assemble the result by the following:
+
+     cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+     We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+     not necessarily a multiple of 3 we lose some information.
+
+     Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
+     an integer in [-2, 2], and can be looked up in the table T. Hence the
+     result is assembled as:
+
+     cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
+
+  float64x2_t ef = vcvtq_f64_s64 (e);
+  float64x2_t eb3f = vrndnq_f64 (vmulq_f64 (ef, one_third));
+  int64x2_t em3 = vcvtq_s64_f64 (vfmsq_f64 (ef, eb3f, v_f64 (3)));
+  int64x2_t ey = vcvtq_s64_f64 (eb3f);
+
+  float64x2_t my = (float64x2_t){ d->table[em3[0] + 2], d->table[em3[1] + 2] };
+  my = vmulq_f64 (my, a);
+
+  /* Vector version of ldexp.  */
+  float64x2_t y = vreinterpretq_f64_s64 (
+      vshlq_n_s64 (vaddq_s64 (ey, vaddq_s64 (exp_bias, v_s64 (1))), 52));
+  y = vmulq_f64 (y, my);
+
+  if (__glibc_unlikely (v_any_u32h (special)))
+    return special_case (x, vbslq_f64 (d->abs_mask, y, x), special);
+
+  /* Copy sign.  */
+  return vbslq_f64 (d->abs_mask, y, x);
+}
diff --git a/sysdeps/aarch64/fpu/cbrt_sve.c b/sysdeps/aarch64/fpu/cbrt_sve.c
new file mode 100644
index 0000000000..fc976eda2a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrt_sve.c
@@ -0,0 +1,128 @@
+/* Double-precision vector (SVE) cbrt function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+const static struct data
+{
+  float64_t poly[4];
+  float64_t table[5];
+  float64_t one_third, two_thirds, shift;
+  int64_t exp_bias;
+  uint64_t tiny_bound, thresh;
+} data = {
+  /* Generated with FPMinimax in [0.5, 1].  */
+  .poly = { 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1,
+	    0x1.2c74eaa3ba428p-3, },
+  /* table[i] = 2^((i - 2) / 3).  */
+  .table = { 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
+	     0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0, },
+  .one_third = 0x1.5555555555555p-2,
+  .two_thirds = 0x1.5555555555555p-1,
+  .shift = 0x1.8p52,
+  .exp_bias = 1022,
+  .tiny_bound = 0x0010000000000000, /* Smallest normal.  */
+  .thresh = 0x7fe0000000000000, /* asuint64 (infinity) - tiny_bound.  */
+};
+
+#define MantissaMask 0x000fffffffffffff
+#define HalfExp 0x3fe0000000000000
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+  return sv_call_f64 (cbrt, x, y, special);
+}
+
+static inline svfloat64_t
+shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
+{
+  return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order
+   polynomial and two Newton iterations. Greatest observed error is 1.79 ULP.
+   Errors repeat according to the exponent, for instance an error observed for
+   double value m * 2^e will be observed for any input m * 2^(e + 3*i), where i
+   is an integer.
+   _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
+					  want 0x1.965f53b0e5d95p-342.  */
+svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svfloat64_t ax = svabs_x (pg, x);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
+
+  /* Subnormal, +/-0 and special values.  */
+  svbool_t special = svcmpge (pg, svsub_x (pg, iax, d->tiny_bound), d->thresh);
+
+  /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+     version of frexp, which gets subnormal values wrong - these have to be
+     special-cased as a result.  */
+  svfloat64_t m = svreinterpret_f64 (svorr_x (
+      pg, svand_x (pg, svreinterpret_u64 (x), MantissaMask), HalfExp));
+  svint64_t e
+      = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, iax, 52)), d->exp_bias);
+
+  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+     for Newton iterations.  */
+  svfloat64_t p
+      = sv_pairwise_poly_3_f64_x (pg, m, svmul_x (pg, m, m), d->poly);
+
+  /* Two iterations of Newton's method for iteratively approximating cbrt.  */
+  svfloat64_t m_by_3 = svmul_x (pg, m, d->one_third);
+  svfloat64_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
+			   d->two_thirds);
+  a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, a, a)), a, d->two_thirds);
+
+  /* Assemble the result by the following:
+
+     cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+     We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+     not necessarily a multiple of 3 we lose some information.
+
+     Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+     is an integer in [-2, 2], and can be looked up in the table T. Hence the
+     result is assembled as:
+
+     cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
+  svfloat64_t eb3f = svmul_x (pg, svcvt_f64_x (pg, e), d->one_third);
+  svint64_t ey = svcvt_s64_x (pg, eb3f);
+  svint64_t em3 = svmls_x (pg, e, ey, 3);
+
+  svfloat64_t my = shifted_lookup (pg, d->table, em3);
+  my = svmul_x (pg, my, a);
+
+  /* Vector version of ldexp.  */
+  svfloat64_t y = svscale_x (pg, my, ey);
+
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    return special_case (
+	x, svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)),
+	special);
+
+  /* Copy sign.  */
+  return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/cbrtf_advsimd.c b/sysdeps/aarch64/fpu/cbrtf_advsimd.c
new file mode 100644
index 0000000000..27debb8b57
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrtf_advsimd.c
@@ -0,0 +1,123 @@
+/* Single-precision vector (AdvSIMD) cbrt function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+
+const static struct data
+{
+  float32x4_t poly[4], one_third;
+  float table[5];
+} data = {
+  .poly = { /* Very rough approximation of cbrt(x) in [0.5, 1], generated with
+               FPMinimax.  */
+	    V4 (0x1.c14e96p-2), V4 (0x1.dd2d3p-1), V4 (-0x1.08e81ap-1),
+	    V4 (0x1.2c74c2p-3) },
+  .table = { /* table[i] = 2^((i - 2) / 3).  */
+	    0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
+  .one_third = V4 (0x1.555556p-2f),
+};
+
+#define SignMask v_u32 (0x80000000)
+#define SmallestNormal v_u32 (0x00800000)
+#define Thresh vdup_n_u16 (0x7f00) /* asuint(INFINITY) - SmallestNormal.  */
+#define MantissaMask v_u32 (0x007fffff)
+#define HalfExp v_u32 (0x3f000000)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint16x4_t special)
+{
+  return v_call_f32 (cbrtf, x, y, vmovl_u16 (special));
+}
+
+static inline float32x4_t
+shifted_lookup (const float *table, int32x4_t i)
+{
+  return (float32x4_t){ table[i[0] + 2], table[i[1] + 2], table[i[2] + 2],
+			table[i[3] + 2] };
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration
+   with initial guess obtained by a low-order polynomial. Greatest error
+   is 1.64 ULP. This is observed for every value where the mantissa is
+   0x1.85a2aa and the exponent is a multiple of 3, for example:
+   _ZGVnN4v_cbrtf(0x1.85a2aap+3) got 0x1.267936p+1
+				want 0x1.267932p+1.  */
+VPCS_ATTR float32x4_t V_NAME_F1 (cbrt) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
+  /* Subnormal, +/-0 and special values.  */
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (iax, SmallestNormal), Thresh);
+
+  /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+     version of frexpf, which gets subnormal values wrong - these have to be
+     special-cased as a result.  */
+  float32x4_t m = vbslq_f32 (MantissaMask, x, v_f32 (0.5));
+  int32x4_t e
+      = vsubq_s32 (vreinterpretq_s32_u32 (vshrq_n_u32 (iax, 23)), v_s32 (126));
+
+  /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+     the less accurate the next stage of the algorithm needs to be. An order-4
+     polynomial is enough for one Newton iteration.  */
+  float32x4_t p = v_pairwise_poly_3_f32 (m, vmulq_f32 (m, m), d->poly);
+
+  float32x4_t one_third = d->one_third;
+  float32x4_t two_thirds = vaddq_f32 (one_third, one_third);
+
+  /* One iteration of Newton's method for iteratively approximating cbrt.  */
+  float32x4_t m_by_3 = vmulq_f32 (m, one_third);
+  float32x4_t a
+      = vfmaq_f32 (vdivq_f32 (m_by_3, vmulq_f32 (p, p)), two_thirds, p);
+
+  /* Assemble the result by the following:
+
+     cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+     We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+     not necessarily a multiple of 3 we lose some information.
+
+     Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+     is an integer in [-2, 2], and can be looked up in the table T. Hence the
+     result is assembled as:
+
+     cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
+  float32x4_t ef = vmulq_f32 (vcvtq_f32_s32 (e), one_third);
+  int32x4_t ey = vcvtq_s32_f32 (ef);
+  int32x4_t em3 = vsubq_s32 (e, vmulq_s32 (ey, v_s32 (3)));
+
+  float32x4_t my = shifted_lookup (d->table, em3);
+  my = vmulq_f32 (my, a);
+
+  /* Vector version of ldexpf.  */
+  float32x4_t y
+      = vreinterpretq_f32_s32 (vshlq_n_s32 (vaddq_s32 (ey, v_s32 (127)), 23));
+  y = vmulq_f32 (y, my);
+
+  if (__glibc_unlikely (v_any_u16h (special)))
+    return special_case (x, vbslq_f32 (SignMask, x, y), special);
+
+  /* Copy sign.  */
+  return vbslq_f32 (SignMask, x, y);
+}
+libmvec_hidden_def (V_NAME_F1 (cbrt))
+HALF_WIDTH_ALIAS_F1 (cbrt)
diff --git a/sysdeps/aarch64/fpu/cbrtf_sve.c b/sysdeps/aarch64/fpu/cbrtf_sve.c
new file mode 100644
index 0000000000..23c220c202
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrtf_sve.c
@@ -0,0 +1,122 @@
+/* Single-precision vector (SVE) cbrt function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+#include "poly_sve_f32.h"
+
+const static struct data
+{
+  float32_t poly[4];
+  float32_t table[5];
+  float32_t one_third, two_thirds;
+} data = {
+  /* Very rough approximation of cbrt(x) in [0.5, 1], generated with FPMinimax.
+   */
+  .poly = { 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1,
+	    0x1.2c74c2p-3, },
+  /* table[i] = 2^((i - 2) / 3).  */
+  .table = { 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
+  .one_third = 0x1.555556p-2f,
+  .two_thirds = 0x1.555556p-1f,
+};
+
+#define SmallestNormal 0x00800000
+#define Thresh 0x7f000000 /* asuint(INFINITY) - SmallestNormal.  */
+#define MantissaMask 0x007fffff
+#define HalfExp 0x3f000000
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+  return sv_call_f32 (cbrtf, x, y, special);
+}
+
+static inline svfloat32_t
+shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
+{
+  return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration
+   with initial guess obtained by a low-order polynomial. Greatest error
+   is 1.64 ULP. This is observed for every value where the mantissa is
+   0x1.85a2aa and the exponent is a multiple of 3, for example:
+   _ZGVsMxv_cbrtf (0x1.85a2aap+3) got 0x1.267936p+1
+				 want 0x1.267932p+1.  */
+svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+
+  /* Subnormal, +/-0 and special values.  */
+  svbool_t special = svcmpge (pg, svsub_x (pg, iax, SmallestNormal), Thresh);
+
+  /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+     version of frexpf, which gets subnormal values wrong - these have to be
+     special-cased as a result.  */
+  svfloat32_t m = svreinterpret_f32 (svorr_x (
+      pg, svand_x (pg, svreinterpret_u32 (x), MantissaMask), HalfExp));
+  svint32_t e = svsub_x (pg, svreinterpret_s32 (svlsr_x (pg, iax, 23)), 126);
+
+  /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+     the less accurate the next stage of the algorithm needs to be. An order-4
+     polynomial is enough for one Newton iteration.  */
+  svfloat32_t p
+      = sv_pairwise_poly_3_f32_x (pg, m, svmul_x (pg, m, m), d->poly);
+
+  /* One iteration of Newton's method for iteratively approximating cbrt.  */
+  svfloat32_t m_by_3 = svmul_x (pg, m, d->one_third);
+  svfloat32_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
+			   d->two_thirds);
+
+  /* Assemble the result by the following:
+
+     cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+     We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+     not necessarily a multiple of 3 we lose some information.
+
+     Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+     is an integer in [-2, 2], and can be looked up in the table T. Hence the
+     result is assembled as:
+
+     cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
+  svfloat32_t ef = svmul_x (pg, svcvt_f32_x (pg, e), d->one_third);
+  svint32_t ey = svcvt_s32_x (pg, ef);
+  svint32_t em3 = svmls_x (pg, e, ey, 3);
+
+  svfloat32_t my = shifted_lookup (pg, d->table, em3);
+  my = svmul_x (pg, my, a);
+
+  /* Vector version of ldexpf.  */
+  svfloat32_t y = svscale_x (pg, my, ey);
+
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    return special_case (
+	x, svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign)),
+	special);
+
+  /* Copy sign.  */
+  return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/cosh_advsimd.c b/sysdeps/aarch64/fpu/cosh_advsimd.c
index ec7b59637e..4bee734f00 100644
--- a/sysdeps/aarch64/fpu/cosh_advsimd.c
+++ b/sysdeps/aarch64/fpu/cosh_advsimd.c
@@ -22,7 +22,9 @@
 static const struct data
 {
   float64x2_t poly[3];
-  float64x2_t inv_ln2, ln2, shift, thres;
+  float64x2_t inv_ln2;
+  double ln2[2];
+  float64x2_t shift, thres;
   uint64x2_t index_mask, special_bound;
 } data = {
   .poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
@@ -58,8 +60,9 @@ exp_inline (float64x2_t x)
   float64x2_t n = vsubq_f64 (z, d->shift);
 
   /* r = x - n*ln2/N.  */
-  float64x2_t r = vfmaq_laneq_f64 (x, n, d->ln2, 0);
-  r = vfmaq_laneq_f64 (r, n, d->ln2, 1);
+  float64x2_t ln2 = vld1q_f64 (d->ln2);
+  float64x2_t r = vfmaq_laneq_f64 (x, n, ln2, 0);
+  r = vfmaq_laneq_f64 (r, n, ln2, 1);
 
   uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
   uint64x2_t i = vandq_u64 (u, d->index_mask);
diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c
index 3e70cbc025..19cbb7d0f4 100644
--- a/sysdeps/aarch64/fpu/erf_advsimd.c
+++ b/sysdeps/aarch64/fpu/erf_advsimd.c
@@ -56,8 +56,8 @@ static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  float64x2_t e1 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[0])),
-	      e2 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[1]));
+  float64x2_t e1 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 0)].erf),
+	      e2 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 1)].erf);
   e.erf = vuzp1q_f64 (e1, e2);
   e.scale = vuzp2q_f64 (e1, e2);
   return e;
diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c
index 548f21a3d6..f1b3bfe830 100644
--- a/sysdeps/aarch64/fpu/erfc_advsimd.c
+++ b/sysdeps/aarch64/fpu/erfc_advsimd.c
@@ -26,7 +26,7 @@ static const struct data
   float64x2_t max, shift;
   float64x2_t p20, p40, p41, p42;
   float64x2_t p51, p52;
-  float64x2_t qr5, qr6, qr7, qr8, qr9;
+  double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2];
 #if WANT_SIMD_EXCEPT
   float64x2_t uflow_bound;
 #endif
@@ -68,8 +68,10 @@ static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  float64x2_t e1 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[0])),
-	      e2 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[1]));
+  float64x2_t e1
+      = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc);
+  float64x2_t e2
+      = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc);
   e.erfc = vuzp1q_f64 (e1, e2);
   e.scale = vuzp2q_f64 (e1, e2);
   return e;
@@ -161,16 +163,19 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
   p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5));
   /* Compute p_i using recurrence relation:
      p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}.  */
-  float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, dat->qr5, 0));
-  p6 = vmulq_laneq_f64 (p6, dat->qr5, 1);
-  float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, dat->qr6, 0));
-  p7 = vmulq_laneq_f64 (p7, dat->qr6, 1);
-  float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, dat->qr7, 0));
-  p8 = vmulq_laneq_f64 (p8, dat->qr7, 1);
-  float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, dat->qr8, 0));
-  p9 = vmulq_laneq_f64 (p9, dat->qr8, 1);
-  float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, dat->qr9, 0));
-  p10 = vmulq_laneq_f64 (p10, dat->qr9, 1);
+  float64x2_t qr5 = vld1q_f64 (dat->qr5), qr6 = vld1q_f64 (dat->qr6),
+	      qr7 = vld1q_f64 (dat->qr7), qr8 = vld1q_f64 (dat->qr8),
+	      qr9 = vld1q_f64 (dat->qr9);
+  float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, qr5, 0));
+  p6 = vmulq_laneq_f64 (p6, qr5, 1);
+  float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, qr6, 0));
+  p7 = vmulq_laneq_f64 (p7, qr6, 1);
+  float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, qr7, 0));
+  p8 = vmulq_laneq_f64 (p8, qr7, 1);
+  float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, qr8, 0));
+  p9 = vmulq_laneq_f64 (p9, qr8, 1);
+  float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, qr9, 0));
+  p10 = vmulq_laneq_f64 (p10, qr9, 1);
   /* Compute polynomial in d using pairwise Horner scheme.  */
   float64x2_t p90 = vfmaq_f64 (p9, d, p10);
   float64x2_t p78 = vfmaq_f64 (p7, d, p8);
diff --git a/sysdeps/aarch64/fpu/erfcf_advsimd.c b/sysdeps/aarch64/fpu/erfcf_advsimd.c
index 30b9e48dd4..ca5bc3ab33 100644
--- a/sysdeps/aarch64/fpu/erfcf_advsimd.c
+++ b/sysdeps/aarch64/fpu/erfcf_advsimd.c
@@ -23,7 +23,8 @@ static const struct data
 {
   uint32x4_t offset, table_scale;
   float32x4_t max, shift;
-  float32x4_t coeffs, third, two_over_five, tenth;
+  float coeffs[4];
+  float32x4_t third, two_over_five, tenth;
 #if WANT_SIMD_EXCEPT
   float32x4_t uflow_bound;
 #endif
@@ -37,7 +38,7 @@ static const struct data
   .shift = V4 (0x1p17f),
   /* Store 1/3, 2/3 and 2/15 in a single register for use with indexed muls and
      fmas.  */
-  .coeffs = (float32x4_t){ 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
+  .coeffs = { 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
   .third = V4 (0x1.555556p-2f),
   .two_over_five = V4 (-0x1.99999ap-2f),
   .tenth = V4 (-0x1.99999ap-4f),
@@ -60,12 +61,16 @@ static inline struct entry
 lookup (uint32x4_t i)
 {
   struct entry e;
-  float64_t t0 = *((float64_t *) (__erfcf_data.tab - Off + i[0]));
-  float64_t t1 = *((float64_t *) (__erfcf_data.tab - Off + i[1]));
-  float64_t t2 = *((float64_t *) (__erfcf_data.tab - Off + i[2]));
-  float64_t t3 = *((float64_t *) (__erfcf_data.tab - Off + i[3]));
-  float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
-  float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+  float32x2_t t0
+      = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc);
+  float32x2_t t1
+      = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc);
+  float32x2_t t2
+      = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc);
+  float32x2_t t3
+      = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc);
+  float32x4_t e1 = vcombine_f32 (t0, t1);
+  float32x4_t e2 = vcombine_f32 (t2, t3);
   e.erfc = vuzp1q_f32 (e1, e2);
   e.scale = vuzp2q_f32 (e1, e2);
   return e;
@@ -140,10 +145,11 @@ float32x4_t NOINLINE V_NAME_F1 (erfc) (float32x4_t x)
   float32x4_t r2 = vmulq_f32 (r, r);
 
   float32x4_t p1 = r;
-  float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, dat->coeffs, 1);
+  float32x4_t coeffs = vld1q_f32 (dat->coeffs);
+  float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, coeffs, 1);
   float32x4_t p3
-      = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, dat->coeffs, 0));
-  float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, dat->coeffs, 2);
+      = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, coeffs, 0));
+  float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, coeffs, 2);
   p4 = vfmsq_f32 (dat->tenth, r2, p4);
 
   float32x4_t y = vfmaq_f32 (p3, d, p4);
diff --git a/sysdeps/aarch64/fpu/erff_advsimd.c b/sysdeps/aarch64/fpu/erff_advsimd.c
index c44644a71c..f2fe6ff236 100644
--- a/sysdeps/aarch64/fpu/erff_advsimd.c
+++ b/sysdeps/aarch64/fpu/erff_advsimd.c
@@ -47,12 +47,12 @@ static inline struct entry
 lookup (uint32x4_t i)
 {
   struct entry e;
-  float64_t t0 = *((float64_t *) (__erff_data.tab + i[0]));
-  float64_t t1 = *((float64_t *) (__erff_data.tab + i[1]));
-  float64_t t2 = *((float64_t *) (__erff_data.tab + i[2]));
-  float64_t t3 = *((float64_t *) (__erff_data.tab + i[3]));
-  float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
-  float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+  float32x2_t t0 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 0)].erf);
+  float32x2_t t1 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 1)].erf);
+  float32x2_t t2 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 2)].erf);
+  float32x2_t t3 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 3)].erf);
+  float32x4_t e1 = vcombine_f32 (t0, t1);
+  float32x4_t e2 = vcombine_f32 (t2, t3);
   e.erf = vuzp1q_f32 (e1, e2);
   e.scale = vuzp2q_f32 (e1, e2);
   return e;
diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c
index ab117b69da..cf53e73290 100644
--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c
@@ -25,7 +25,8 @@
 static const struct data
 {
   float32x4_t poly[5];
-  float32x4_t log10_2_and_inv, shift;
+  float log10_2_and_inv[4];
+  float32x4_t shift;
 
 #if !WANT_SIMD_EXCEPT
   float32x4_t scale_thresh;
@@ -111,10 +112,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
   /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2].  */
-  float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
+  float32x4_t log10_2_and_inv = vld1q_f32 (d->log10_2_and_inv);
+  float32x4_t z = vfmaq_laneq_f32 (d->shift, x, log10_2_and_inv, 0);
   float32x4_t n = vsubq_f32 (z, d->shift);
-  float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
-  r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
+  float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_and_inv, 1);
+  r = vfmsq_laneq_f32 (r, n, log10_2_and_inv, 2);
   uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
 
   float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c
index 3628398674..3db3b80c49 100644
--- a/sysdeps/aarch64/fpu/expm1_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1_advsimd.c
@@ -23,7 +23,9 @@
 static const struct data
 {
   float64x2_t poly[11];
-  float64x2_t invln2, ln2, shift;
+  float64x2_t invln2;
+  double ln2[2];
+  float64x2_t shift;
   int64x2_t exponent_bias;
 #if WANT_SIMD_EXCEPT
   uint64x2_t thresh, tiny_bound;
@@ -92,8 +94,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
      where 2^i is exact because i is an integer.  */
   float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
   int64x2_t i = vcvtq_s64_f64 (n);
-  float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
-  f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
+  float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+  float64x2_t f = vfmsq_laneq_f64 (x, n, ln2, 0);
+  f = vfmsq_laneq_f64 (f, n, ln2, 1);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
index 93db200f61..a0616ec754 100644
--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
@@ -23,7 +23,7 @@
 static const struct data
 {
   float32x4_t poly[5];
-  float32x4_t invln2_and_ln2;
+  float invln2_and_ln2[4];
   float32x4_t shift;
   int32x4_t exponent_bias;
 #if WANT_SIMD_EXCEPT
@@ -88,11 +88,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  float32x4_t j = vsubq_f32 (
-      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+  float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
+  float32x4_t j
+      = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
   int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+  float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
+  f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
diff --git a/sysdeps/aarch64/fpu/finite_pow.h b/sysdeps/aarch64/fpu/finite_pow.h
new file mode 100644
index 0000000000..84c93d4048
--- /dev/null
+++ b/sysdeps/aarch64/fpu/finite_pow.h
@@ -0,0 +1,373 @@
+/* Double-precision x^y function.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "math_config.h"
+
+/* Scalar version of pow used for fallbacks in vector implementations.  */
+
+/* Data is defined in v_pow_log_data.c.  */
+#define N_LOG (1 << V_POW_LOG_TABLE_BITS)
+#define Off 0x3fe6955500000000
+#define As __v_pow_log_data.poly
+
+/* Data is defined in v_pow_exp_data.c.  */
+#define N_EXP (1 << V_POW_EXP_TABLE_BITS)
+#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
+#define SmallExp 0x3c9 /* top12(0x1p-54).  */
+#define BigExp 0x408   /* top12(512.0).  */
+#define ThresExp 0x03f /* BigExp - SmallExp.  */
+#define InvLn2N __v_pow_exp_data.n_over_ln2
+#define Ln2HiN __v_pow_exp_data.ln2_over_n_hi
+#define Ln2LoN __v_pow_exp_data.ln2_over_n_lo
+#define SBits __v_pow_exp_data.sbits
+#define Cs __v_pow_exp_data.poly
+
+/* Constants associated with pow.  */
+#define SmallPowX 0x001 /* top12(0x1p-126).  */
+#define BigPowX 0x7ff	/* top12(INFINITY).  */
+#define ThresPowX 0x7fe /* BigPowX - SmallPowX.  */
+#define SmallPowY 0x3be /* top12(0x1.e7b6p-65).  */
+#define BigPowY 0x43e	/* top12(0x1.749p62).  */
+#define ThresPowY 0x080 /* BigPowY - SmallPowY.  */
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t
+top12 (double x)
+{
+  return asuint64 (x) >> 52;
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+   additional 15 bits precision.  IX is the bit representation of x, but
+   normalized in the subnormal range using the sign bit for the exponent.  */
+static inline double
+log_inline (uint64_t ix, double *tail)
+{
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  uint64_t tmp = ix - Off;
+  int i = (tmp >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1);
+  int k = (int64_t) tmp >> 52; /* arithmetic shift.  */
+  uint64_t iz = ix - (tmp & 0xfffULL << 52);
+  double z = asdouble (iz);
+  double kd = (double) k;
+
+  /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+  double invc = __v_pow_log_data.invc[i];
+  double logc = __v_pow_log_data.logc[i];
+  double logctail = __v_pow_log_data.logctail[i];
+
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
+  double r = fma (z, invc, -1.0);
+
+  /* k*Ln2 + log(c) + r.  */
+  double t1 = kd * __v_pow_log_data.ln2_hi + logc;
+  double t2 = t1 + r;
+  double lo1 = kd * __v_pow_log_data.ln2_lo + logctail;
+  double lo2 = t1 - t2 + r;
+
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  double ar = As[0] * r;
+  double ar2 = r * ar;
+  double ar3 = r * ar2;
+  /* k*Ln2 + log(c) + r + A[0]*r*r.  */
+  double hi = t2 + ar2;
+  double lo3 = fma (ar, r, -ar2);
+  double lo4 = t2 - hi + ar2;
+  /* p = log1p(r) - r - A[0]*r*r.  */
+  double p = (ar3
+	      * (As[1] + r * As[2]
+		 + ar2 * (As[3] + r * As[4] + ar2 * (As[5] + r * As[6]))));
+  double lo = lo1 + lo2 + lo3 + lo4 + p;
+  double y = hi + lo;
+  *tail = hi - y + lo;
+  return y;
+}
+
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double
+special_case (double tmp, uint64_t sbits, uint64_t ki)
+{
+  double scale, y;
+
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+      sbits -= 1009ull << 52;
+      scale = asdouble (sbits);
+      y = 0x1p1009 * (scale + scale * tmp);
+      return y;
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  /* Note: sbits is signed scale.  */
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+#if WANT_SIMD_EXCEPT
+  if (fabs (y) < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double hi, lo, one = 1.0;
+      if (y < 0.0)
+	one = -1.0;
+      lo = scale - y + scale * tmp;
+      hi = one + y;
+      lo = one - hi + y + lo;
+      y = (hi + lo) - one;
+      /* Fix the sign of 0.  */
+      if (y == 0.0)
+	y = asdouble (sbits & 0x8000000000000000);
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+#endif
+  y = 0x1p-1022 * y;
+  return y;
+}
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1.  */
+static inline double
+exp_inline (double x, double xtail, uint32_t sign_bias)
+{
+  uint32_t abstop = top12 (x) & 0x7ff;
+  if (__glibc_unlikely (abstop - SmallExp >= ThresExp))
+    {
+      if (abstop - SmallExp >= 0x80000000)
+	{
+	  /* Avoid spurious underflow for tiny x.  */
+	  /* Note: 0 is common input.  */
+	  return sign_bias ? -1.0 : 1.0;
+	}
+      if (abstop >= top12 (1024.0))
+	{
+	  /* Note: inf and nan are already handled.  */
+	  /* Skip errno handling.  */
+#if WANT_SIMD_EXCEPT
+	  return asuint64 (x) >> 63 ? __math_uflow (sign_bias)
+				    : __math_oflow (sign_bias);
+#else
+	  double res_uoflow = asuint64 (x) >> 63 ? 0.0 : INFINITY;
+	  return sign_bias ? -res_uoflow : res_uoflow;
+#endif
+	}
+      /* Large x is special cased below.  */
+      abstop = 0;
+    }
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  double z = InvLn2N * x;
+  double kd = round (z);
+  uint64_t ki = lround (z);
+  double r = x - kd * Ln2HiN - kd * Ln2LoN;
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r += xtail;
+  /* 2^(k/N) ~= scale.  */
+  uint64_t idx = ki & (N_EXP - 1);
+  uint64_t top = (ki + sign_bias) << (52 - V_POW_EXP_TABLE_BITS);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  uint64_t sbits = SBits[idx] + top;
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  double r2 = r * r;
+  double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);
+  if (__glibc_unlikely (abstop == 0))
+    return special_case (tmp, sbits, ki);
+  double scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return scale + scale * tmp;
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   A version of exp_inline that is not inlined and for which sign_bias is
+   equal to 0.  */
+static double NOINLINE
+exp_nosignbias (double x, double xtail)
+{
+  uint32_t abstop = top12 (x) & 0x7ff;
+  if (__glibc_unlikely (abstop - SmallExp >= ThresExp))
+    {
+      /* Avoid spurious underflow for tiny x.  */
+      if (abstop - SmallExp >= 0x80000000)
+	return 1.0;
+      /* Note: inf and nan are already handled.  */
+      if (abstop >= top12 (1024.0))
+#if WANT_SIMD_EXCEPT
+	return asuint64 (x) >> 63 ? __math_uflow (0) : __math_oflow (0);
+#else
+	return asuint64 (x) >> 63 ? 0.0 : INFINITY;
+#endif
+      /* Large x is special cased below.  */
+      abstop = 0;
+    }
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N].  */
+  double z = InvLn2N * x;
+  double kd = round (z);
+  uint64_t ki = lround (z);
+  double r = x - kd * Ln2HiN - kd * Ln2LoN;
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r += xtail;
+  /* 2^(k/N) ~= scale.  */
+  uint64_t idx = ki & (N_EXP - 1);
+  uint64_t top = ki << (52 - V_POW_EXP_TABLE_BITS);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  uint64_t sbits = SBits[idx] + top;
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+  double r2 = r * r;
+  double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);
+  if (__glibc_unlikely (abstop == 0))
+    return special_case (tmp, sbits, ki);
+  double scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return scale + scale * tmp;
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int
+checkint (uint64_t iy)
+{
+  int e = iy >> 52 & 0x7ff;
+  if (e < 0x3ff)
+    return 0;
+  if (e > 0x3ff + 52)
+    return 2;
+  if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+    return 0;
+  if (iy & (1ULL << (0x3ff + 52 - e)))
+    return 1;
+  return 2;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline int
+zeroinfnan (uint64_t i)
+{
+  return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
+}
+
+static double NOINLINE
+pow_scalar_special_case (double x, double y)
+{
+  uint32_t sign_bias = 0;
+  uint64_t ix, iy;
+  uint32_t topx, topy;
+
+  ix = asuint64 (x);
+  iy = asuint64 (y);
+  topx = top12 (x);
+  topy = top12 (y);
+  if (__glibc_unlikely (topx - SmallPowX >= ThresPowX
+		|| (topy & 0x7ff) - SmallPowY >= ThresPowY))
+    {
+      /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
+	 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
+      /* Special cases: (x < 0x1p-126 or inf or nan) or
+	 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
+      if (__glibc_unlikely (zeroinfnan (iy)))
+	{
+	  if (2 * iy == 0)
+	    return issignaling_inline (x) ? x + y : 1.0;
+	  if (ix == asuint64 (1.0))
+	    return issignaling_inline (y) ? x + y : 1.0;
+	  if (2 * ix > 2 * asuint64 (INFINITY)
+	      || 2 * iy > 2 * asuint64 (INFINITY))
+	    return x + y;
+	  if (2 * ix == 2 * asuint64 (1.0))
+	    return 1.0;
+	  if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+	    return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+	  return y * y;
+	}
+      if (__glibc_unlikely (zeroinfnan (ix)))
+	{
+	  double x2 = x * x;
+	  if (ix >> 63 && checkint (iy) == 1)
+	    {
+	      x2 = -x2;
+	      sign_bias = 1;
+	    }
+#if WANT_SIMD_EXCEPT
+	  if (2 * ix == 0 && iy >> 63)
+	    return __math_divzero (sign_bias);
+#endif
+	  return iy >> 63 ? 1 / x2 : x2;
+	}
+      /* Here x and y are non-zero finite.  */
+      if (ix >> 63)
+	{
+	  /* Finite x < 0.  */
+	  int yint = checkint (iy);
+	  if (yint == 0)
+#if WANT_SIMD_EXCEPT
+	    return __math_invalid (x);
+#else
+	    return __builtin_nan ("");
+#endif
+	  if (yint == 1)
+	    sign_bias = SignBias;
+	  ix &= 0x7fffffffffffffff;
+	  topx &= 0x7ff;
+	}
+      if ((topy & 0x7ff) - SmallPowY >= ThresPowY)
+	{
+	  /* Note: sign_bias == 0 here because y is not odd.  */
+	  if (ix == asuint64 (1.0))
+	    return 1.0;
+	  /* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
+	  if ((topy & 0x7ff) < SmallPowY)
+	    return 1.0;
+#if WANT_SIMD_EXCEPT
+	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
+							 : __math_uflow (0);
+#else
+	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? INFINITY : 0;
+#endif
+	}
+      if (topx == 0)
+	{
+	  /* Normalize subnormal x so exponent becomes negative.  */
+	  ix = asuint64 (x * 0x1p52);
+	  ix &= 0x7fffffffffffffff;
+	  ix -= 52ULL << 52;
+	}
+    }
+
+  double lo;
+  double hi = log_inline (ix, &lo);
+  double ehi = y * hi;
+  double elo = y * lo + fma (y, hi, -ehi);
+  return exp_inline (ehi, elo, sign_bias);
+}
diff --git a/sysdeps/aarch64/fpu/hypot_advsimd.c b/sysdeps/aarch64/fpu/hypot_advsimd.c
new file mode 100644
index 0000000000..e4e279fa0c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypot_advsimd.c
@@ -0,0 +1,97 @@
+/* Double-precision vector (Advanced SIMD) hypot function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+#if WANT_SIMD_EXCEPT
+static const struct data
+{
+  uint64x2_t tiny_bound, thres;
+} data = {
+  .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511).  */
+  .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound.  */
+};
+#else
+static const struct data
+{
+  uint64x2_t tiny_bound;
+  uint32x4_t thres;
+} data = {
+  .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969).  */
+  .thres = V4 (0x7c900000),	 /* asuint (inf) - tiny_bound.  */
+};
+#endif
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum,
+	      uint32x2_t special)
+{
+  return v_call2_f64 (hypot, x, y, vsqrtq_f64 (sqsum), vmovl_u32 (special));
+}
+
+/* Vector implementation of double-precision hypot.
+   Maximum error observed is 1.21 ULP:
+   _ZGVnN2vv_hypot (0x1.6a1b193ff85b5p-204, 0x1.bc50676c2a447p-222)
+    got 0x1.6a1b19400964ep-204
+   want 0x1.6a1b19400964dp-204.  */
+#if WANT_SIMD_EXCEPT
+
+float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float64x2_t ax = vabsq_f64 (x);
+  float64x2_t ay = vabsq_f64 (y);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (ax);
+  uint64x2_t iy = vreinterpretq_u64_f64 (ay);
+
+  /* Extreme values, NaNs, and infinities should be handled by the scalar
+     fallback for correct flag handling.  */
+  uint64x2_t specialx = vcgeq_u64 (vsubq_u64 (ix, d->tiny_bound), d->thres);
+  uint64x2_t specialy = vcgeq_u64 (vsubq_u64 (iy, d->tiny_bound), d->thres);
+  ax = v_zerofy_f64 (ax, specialx);
+  ay = v_zerofy_f64 (ay, specialy);
+  uint32x2_t special = vaddhn_u64 (specialx, specialy);
+
+  float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay);
+
+  if (__glibc_unlikely (v_any_u32h (special)))
+    return special_case (x, y, sqsum, special);
+
+  return vsqrtq_f64 (sqsum);
+}
+#else
+
+float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y);
+
+  uint32x2_t special = vcge_u32 (
+      vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
+      vget_low_u32 (d->thres));
+
+  if (__glibc_unlikely (v_any_u32h (special)))
+    return special_case (x, y, sqsum, special);
+
+  return vsqrtq_f64 (sqsum);
+}
+#endif
diff --git a/sysdeps/aarch64/fpu/hypot_sve.c b/sysdeps/aarch64/fpu/hypot_sve.c
new file mode 100644
index 0000000000..74417040ac
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypot_sve.c
@@ -0,0 +1,54 @@
+/* Double-precision vector (SVE) hypot function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  uint64_t tiny_bound, thres;
+} data = {
+  .tiny_bound = 0x0c80000000000000, /* asuint (0x1p-102).  */
+  .thres = 0x7300000000000000,	    /* asuint (inf) - tiny_bound.  */
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
+	      svbool_t special)
+{
+  return sv_call2_f64 (hypot, x, y, svsqrt_x (pg, sqsum), special);
+}
+
+/* SVE implementation of double-precision hypot.
+   Maximum error observed is 1.21 ULP:
+   _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
+    got 0x1.6a22d0412cfp+352
+   want 0x1.6a22d0412cf01p+352.  */
+svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svfloat64_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
+
+  svbool_t special = svcmpge (
+      pg, svsub_x (pg, svreinterpret_u64 (sqsum), d->tiny_bound), d->thres);
+
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    return special_case (sqsum, x, y, pg, special);
+  return svsqrt_x (pg, sqsum);
+}
diff --git a/sysdeps/aarch64/fpu/hypotf_advsimd.c b/sysdeps/aarch64/fpu/hypotf_advsimd.c
new file mode 100644
index 0000000000..34818b021a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypotf_advsimd.c
@@ -0,0 +1,98 @@
+/* Single-precision vector (Advanced SIMD) hypot function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+#if WANT_SIMD_EXCEPT
+static const struct data
+{
+  uint32x4_t tiny_bound, thres;
+} data = {
+  .tiny_bound = V4 (0x20000000), /* asuint (0x1p-63).  */
+  .thres = V4 (0x3f000000), /* asuint (0x1p63) - tiny_bound.  */
+};
+#else
+static const struct data
+{
+  uint32x4_t tiny_bound;
+  uint16x8_t thres;
+} data = {
+  .tiny_bound = V4 (0x0C800000), /* asuint (0x1p-102).  */
+  .thres = V8 (0x7300), /* asuint (inf) - tiny_bound.  */
+};
+#endif
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t sqsum,
+	      uint16x4_t special)
+{
+  return v_call2_f32 (hypotf, x, y, vsqrtq_f32 (sqsum), vmovl_u16 (special));
+}
+
+/* Vector implementation of single-precision hypot.
+   Maximum error observed is 1.21 ULP:
+   _ZGVnN4vv_hypotf (0x1.6a419cp-13, 0x1.82a852p-22) got 0x1.6a41d2p-13
+						    want 0x1.6a41dp-13.  */
+#if WANT_SIMD_EXCEPT
+
+float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float32x4_t ax = vabsq_f32 (x);
+  float32x4_t ay = vabsq_f32 (y);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (ax);
+  uint32x4_t iy = vreinterpretq_u32_f32 (ay);
+
+  /* Extreme values, NaNs, and infinities should be handled by the scalar
+     fallback for correct flag handling.  */
+  uint32x4_t specialx = vcgeq_u32 (vsubq_u32 (ix, d->tiny_bound), d->thres);
+  uint32x4_t specialy = vcgeq_u32 (vsubq_u32 (iy, d->tiny_bound), d->thres);
+  ax = v_zerofy_f32 (ax, specialx);
+  ay = v_zerofy_f32 (ay, specialy);
+  uint16x4_t special = vaddhn_u32 (specialx, specialy);
+
+  float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (ax, ax), ay, ay);
+
+  if (__glibc_unlikely (v_any_u16h (special)))
+    return special_case (x, y, sqsum, special);
+
+  return vsqrtq_f32 (sqsum);
+}
+#else
+
+float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (x, x), y, y);
+
+  uint16x4_t special = vcge_u16 (
+      vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
+      vget_low_u16 (d->thres));
+
+  if (__glibc_unlikely (v_any_u16h (special)))
+    return special_case (x, y, sqsum, special);
+
+  return vsqrtq_f32 (sqsum);
+}
+#endif
+libmvec_hidden_def (V_NAME_F2 (hypot))
+HALF_WIDTH_ALIAS_F2(hypot)
diff --git a/sysdeps/aarch64/fpu/hypotf_sve.c b/sysdeps/aarch64/fpu/hypotf_sve.c
new file mode 100644
index 0000000000..3a403de66e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypotf_sve.c
@@ -0,0 +1,48 @@
+/* Single-precision vector (SVE) hypot function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+#define TinyBound 0x0c800000 /* asuint (0x1p-102).  */
+#define Thres 0x73000000     /* 0x70000000 - TinyBound.  */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
+	      svbool_t special)
+{
+  return sv_call2_f32 (hypotf, x, y, svsqrt_x (pg, sqsum), special);
+}
+
+/* SVE implementation of single-precision hypot.
+   Maximum error observed is 1.21 ULP:
+   _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
+						     want 0x1.6a2344p-19.  */
+svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
+				const svbool_t pg)
+{
+  svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
+
+  svbool_t special = svcmpge (
+      pg, svsub_x (pg, svreinterpret_u32 (sqsum), TinyBound), Thres);
+
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    return special_case (sqsum, x, y, pg, special);
+
+  return svsqrt_x (pg, sqsum);
+}
diff --git a/sysdeps/aarch64/fpu/log10_advsimd.c b/sysdeps/aarch64/fpu/log10_advsimd.c
index 1e5ef99e89..c065aaebae 100644
--- a/sysdeps/aarch64/fpu/log10_advsimd.c
+++ b/sysdeps/aarch64/fpu/log10_advsimd.c
@@ -58,8 +58,10 @@ static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+  uint64_t i0
+      = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+  uint64_t i1
+      = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
   float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc);
   float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc);
   e.invc = vuzp1q_f64 (e0, e1);
diff --git a/sysdeps/aarch64/fpu/log2_advsimd.c b/sysdeps/aarch64/fpu/log2_advsimd.c
index a34978f6cf..4057c552d8 100644
--- a/sysdeps/aarch64/fpu/log2_advsimd.c
+++ b/sysdeps/aarch64/fpu/log2_advsimd.c
@@ -55,8 +55,10 @@ static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+  uint64_t i0
+      = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+  uint64_t i1
+      = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
   float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc);
   float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc);
   e.invc = vuzp1q_f64 (e0, e1);
diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
index 21df61728c..015a6da7d7 100644
--- a/sysdeps/aarch64/fpu/log_advsimd.c
+++ b/sysdeps/aarch64/fpu/log_advsimd.c
@@ -54,17 +54,12 @@ lookup (uint64x2_t i)
 {
   /* Since N is a power of 2, n % N = n & (N - 1).  */
   struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
   float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
   float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
-#if __BYTE_ORDER == __LITTLE_ENDIAN
   e.invc = vuzp1q_f64 (e0, e1);
   e.logc = vuzp2q_f64 (e0, e1);
-#else
-  e.invc = vuzp1q_f64 (e1, e0);
-  e.logc = vuzp2q_f64 (e1, e0);
-#endif
   return e;
 }
 
diff --git a/sysdeps/aarch64/fpu/pow_advsimd.c b/sysdeps/aarch64/fpu/pow_advsimd.c
new file mode 100644
index 0000000000..3c91e3e183
--- /dev/null
+++ b/sysdeps/aarch64/fpu/pow_advsimd.c
@@ -0,0 +1,249 @@
+/* Double-precision vector (AdvSIMD) pow function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+/* Defines parameters of the approximation and scalar fallback.  */
+#include "finite_pow.h"
+
+#define VecSmallExp v_u64 (SmallExp)
+#define VecThresExp v_u64 (ThresExp)
+
+#define VecSmallPowX v_u64 (SmallPowX)
+#define VecThresPowX v_u64 (ThresPowX)
+#define VecSmallPowY v_u64 (SmallPowY)
+#define VecThresPowY v_u64 (ThresPowY)
+
+static const struct data
+{
+  float64x2_t log_poly[6];
+  float64x2_t exp_poly[3];
+  float64x2_t ln2_hi, ln2_lo;
+  float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n, small_powx;
+  uint64x2_t inf;
+} data = {
+  /* Coefficients copied from v_pow_log_data.c
+     relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
+     Coefficients are scaled to match the scaling during evaluation.  */
+  .log_poly
+  = { V2 (0x1.555555555556p-2 * -2), V2 (-0x1.0000000000006p-2 * -2),
+      V2 (0x1.999999959554ep-3 * 4), V2 (-0x1.555555529a47ap-3 * 4),
+      V2 (0x1.2495b9b4845e9p-3 * -8), V2 (-0x1.0002b8b263fc3p-3 * -8) },
+  .ln2_hi = V2 (0x1.62e42fefa3800p-1),
+  .ln2_lo = V2 (0x1.ef35793c76730p-45),
+  /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549
+     (0.550 without fma) if |x| < ln2/512.  */
+  .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3),
+		V2 (0x1.5555576a5adcep-5) },
+  .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics.  */
+  .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2.  */
+  .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N.  */
+  .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45),
+  .small_powx = V2 (0x1p-126),
+  .inf = V2 (0x7ff0000000000000)
+};
+
+#define A(i) data.log_poly[i]
+#define C(i) data.exp_poly[i]
+
+/* This version implements an algorithm close to scalar pow but
+   - does not implement the trick in the exp's specialcase subroutine to avoid
+     double-rounding,
+   - does not use a tail in the exponential core computation,
+   - and pow's exp polynomial order and table bits might differ.
+
+   Maximum measured error is 1.04 ULPs:
+   _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
+     got 0x1.f71162f473251p-1
+    want 0x1.f71162f473252p-1.  */
+
+static inline float64x2_t
+v_masked_lookup_f64 (const double *table, uint64x2_t i)
+{
+  return (float64x2_t){
+    table[(i[0] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)],
+    table[(i[1] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)]
+  };
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+   additional 15 bits precision.  IX is the bit representation of x, but
+   normalized in the subnormal range using the sign bit for the exponent.  */
+static inline float64x2_t
+v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
+{
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off));
+  int64x2_t k
+      = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
+  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52)));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+  /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+  float64x2_t invc = v_masked_lookup_f64 (__v_pow_log_data.invc, tmp);
+  float64x2_t logc = v_masked_lookup_f64 (__v_pow_log_data.logc, tmp);
+  float64x2_t logctail = v_masked_lookup_f64 (__v_pow_log_data.logctail, tmp);
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);
+  /* k*Ln2 + log(c) + r.  */
+  float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi);
+  float64x2_t t2 = vaddq_f64 (t1, r);
+  float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo);
+  float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r);
+  float64x2_t ar2 = vmulq_f64 (r, ar);
+  float64x2_t ar3 = vmulq_f64 (r, ar2);
+  /* k*Ln2 + log(c) + r + A[0]*r*r.  */
+  float64x2_t hi = vaddq_f64 (t2, ar2);
+  float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);
+  float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);
+  /* p = log1p(r) - r - A[0]*r*r.  */
+  float64x2_t a56 = vfmaq_f64 (A (4), r, A (5));
+  float64x2_t a34 = vfmaq_f64 (A (2), r, A (3));
+  float64x2_t a12 = vfmaq_f64 (A (0), r, A (1));
+  float64x2_t p = vfmaq_f64 (a34, ar2, a56);
+  p = vfmaq_f64 (a12, ar2, p);
+  p = vmulq_f64 (ar3, p);
+  float64x2_t lo
+      = vaddq_f64 (vaddq_f64 (vaddq_f64 (vaddq_f64 (lo1, lo2), lo3), lo4), p);
+  float64x2_t y = vaddq_f64 (hi, lo);
+  *tail = vaddq_f64 (vsubq_f64 (hi, y), lo);
+  return y;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+exp_special_case (float64x2_t x, float64x2_t xtail)
+{
+  return (float64x2_t){ exp_nosignbias (x[0], xtail[0]),
+			exp_nosignbias (x[1], xtail[1]) };
+}
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.  */
+static inline float64x2_t
+v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
+{
+  /* Fallback to scalar exp_inline for all lanes if any lane
+     contains value of x s.t. |x| <= 2^-54 or >= 512.  */
+  uint64x2_t abstop
+      = vshrq_n_u64 (vandq_u64 (vreinterpretq_u64_f64 (x), d->inf), 52);
+  uint64x2_t uoflowx
+      = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp);
+  if (__glibc_unlikely (v_any_u64 (uoflowx)))
+    return exp_special_case (x, xtail);
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N].  */
+  float64x2_t z = vmulq_f64 (d->inv_ln2_n, x);
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  float64x2_t kd = vaddq_f64 (z, d->shift);
+  uint64x2_t ki = vreinterpretq_u64_f64 (kd);
+  kd = vsubq_f64 (kd, d->shift);
+  float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n);
+  r = vfmsq_f64 (r, kd, d->ln2_lo_n);
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r = vaddq_f64 (r, xtail);
+  /* 2^(k/N) ~= scale.  */
+  uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));
+  uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  uint64x2_t sbits = v_lookup_u64 (SBits, idx);
+  sbits = vaddq_u64 (sbits, top);
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t tmp = vfmaq_f64 (C (1), r, C (2));
+  tmp = vfmaq_f64 (C (0), r, tmp);
+  tmp = vfmaq_f64 (r, r2, tmp);
+  float64x2_t scale = vreinterpretq_f64_u64 (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return vfmaq_f64 (scale, scale, tmp);
+}
+
+static float64x2_t NOINLINE VPCS_ATTR
+scalar_fallback (float64x2_t x, float64x2_t y)
+{
+  return (float64x2_t){ pow_scalar_special_case (x[0], y[0]),
+			pow_scalar_special_case (x[1], y[1]) };
+}
+
+float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+  /* Case of x <= 0 is too complicated to be vectorised efficiently here,
+     fallback to scalar pow for all lanes if any x < 0 detected.  */
+  if (v_any_u64 (vclezq_s64 (vreinterpretq_s64_f64 (x))))
+    return scalar_fallback (x, y);
+
+  uint64x2_t vix = vreinterpretq_u64_f64 (x);
+  uint64x2_t viy = vreinterpretq_u64_f64 (y);
+  uint64x2_t iay = vandq_u64 (viy, d->inf);
+
+  /* Special cases of x or y.  */
+#if WANT_SIMD_EXCEPT
+  /* Small or large.  */
+  uint64x2_t vtopx = vshrq_n_u64 (vix, 52);
+  uint64x2_t vabstopy = vshrq_n_u64 (iay, 52);
+  uint64x2_t specialx
+      = vcgeq_u64 (vsubq_u64 (vtopx, VecSmallPowX), VecThresPowX);
+  uint64x2_t specialy
+      = vcgeq_u64 (vsubq_u64 (vabstopy, VecSmallPowY), VecThresPowY);
+#else
+  /* The case y==0 does not trigger a special case, since in this case it is
+     necessary to fix the result only if x is a signalling nan, which already
+     triggers a special case. We test y==0 directly in the scalar fallback.  */
+  uint64x2_t iax = vandq_u64 (vix, d->inf);
+  uint64x2_t specialx = vcgeq_u64 (iax, d->inf);
+  uint64x2_t specialy = vcgeq_u64 (iay, d->inf);
+#endif
+  uint64x2_t special = vorrq_u64 (specialx, specialy);
+  /* Fallback to scalar on all lanes if any lane is inf or nan.  */
+  if (__glibc_unlikely (v_any_u64 (special)))
+    return scalar_fallback (x, y);
+
+  /* Small cases of x: |x| < 0x1p-126.  */
+  uint64x2_t smallx = vcaltq_f64 (x, d->small_powx);
+  if (__glibc_unlikely (v_any_u64 (smallx)))
+    {
+      /* Update ix if top 12 bits of x are 0.  */
+      uint64x2_t sub_x = vceqzq_u64 (vshrq_n_u64 (vix, 52));
+      if (__glibc_unlikely (v_any_u64 (sub_x)))
+	{
+	  /* Normalize subnormal x so exponent becomes negative.  */
+	  uint64x2_t vix_norm = vreinterpretq_u64_f64 (
+	      vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (v_u64 (1ULL << 52)))));
+	  vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52));
+	  vix = vbslq_u64 (sub_x, vix_norm, vix);
+	}
+    }
+
+  /* Vector Log(ix, &lo).  */
+  float64x2_t vlo;
+  float64x2_t vhi = v_log_inline (vix, &vlo, d);
+
+  /* Vector Exp(y_loghi, y_loglo).  */
+  float64x2_t vehi = vmulq_f64 (y, vhi);
+  float64x2_t velo = vmulq_f64 (y, vlo);
+  float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);
+  velo = vsubq_f64 (velo, vemi);
+  return v_exp_inline (vehi, velo, d);
+}
diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c
new file mode 100644
index 0000000000..4c0bf8956c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/pow_sve.c
@@ -0,0 +1,411 @@
+/* Double-precision vector (SVE) pow function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This version share a similar algorithm as AOR scalar pow.
+
+   The core computation consists in computing pow(x, y) as
+
+     exp (y * log (x)).
+
+   The algorithms for exp and log are very similar to scalar exp and log.
+   The log relies on table lookup for 3 variables and an order 8 polynomial.
+   It returns a high and a low contribution that are then passed to the exp,
+   to minimise the loss of accuracy in both routines.
+   The exp is based on 8-bit table lookup for scale and order-4 polynomial.
+   The SVE algorithm drops the tail in the exp computation at the price of
+   a lower accuracy, slightly above 1ULP.
+   The SVE algorithm also drops the special treatement of small (< 2^-65) and
+   large (> 2^63) finite values of |y|, as they only affect non-round to nearest
+   modes.
+
+   Maximum measured error is 1.04 ULPs:
+   SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
+     got 0x1.f7116284221fcp-1
+    want 0x1.f7116284221fdp-1.  */
+
+#include "math_config.h"
+#include "sv_math.h"
+
+/* Data is defined in v_pow_log_data.c.  */
+#define N_LOG (1 << V_POW_LOG_TABLE_BITS)
+#define A __v_pow_log_data.poly
+#define Off 0x3fe6955500000000
+
+/* Data is defined in v_pow_exp_data.c.  */
+#define N_EXP (1 << V_POW_EXP_TABLE_BITS)
+#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
+#define C __v_pow_exp_data.poly
+#define SmallExp 0x3c9 /* top12(0x1p-54).  */
+#define BigExp 0x408   /* top12(512.).  */
+#define ThresExp 0x03f /* BigExp - SmallExp.  */
+#define HugeExp 0x409  /* top12(1024.).  */
+
+/* Constants associated with pow.  */
+#define SmallPowX 0x001 /* top12(0x1p-126).  */
+#define BigPowX 0x7ff	/* top12(INFINITY).  */
+#define ThresPowX 0x7fe /* BigPowX - SmallPowX.  */
+#define SmallPowY 0x3be /* top12(0x1.e7b6p-65).  */
+#define BigPowY 0x43e	/* top12(0x1.749p62).  */
+#define ThresPowY 0x080 /* BigPowY - SmallPowY.  */
+
+/* Check if x is an integer.  */
+static inline svbool_t
+sv_isint (svbool_t pg, svfloat64_t x)
+{
+  return svcmpeq (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is real not integer valued.  */
+static inline svbool_t
+sv_isnotint (svbool_t pg, svfloat64_t x)
+{
+  return svcmpne (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is an odd integer.  */
+static inline svbool_t
+sv_isodd (svbool_t pg, svfloat64_t x)
+{
+  svfloat64_t y = svmul_x (pg, x, 0.5);
+  return sv_isnotint (pg, y);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int
+checkint (uint64_t iy)
+{
+  int e = iy >> 52 & 0x7ff;
+  if (e < 0x3ff)
+    return 0;
+  if (e > 0x3ff + 52)
+    return 2;
+  if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+    return 0;
+  if (iy & (1ULL << (0x3ff + 52 - e)))
+    return 1;
+  return 2;
+}
+
+/* Top 12 bits (sign and exponent of each double float lane).  */
+static inline svuint64_t
+sv_top12 (svfloat64_t x)
+{
+  return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline int
+zeroinfnan (uint64_t i)
+{
+  return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline svbool_t
+sv_zeroinfnan (svbool_t pg, svuint64_t i)
+{
+  return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1),
+		  2 * asuint64 (INFINITY) - 1);
+}
+
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double
+specialcase (double tmp, uint64_t sbits, uint64_t ki)
+{
+  double scale;
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+      sbits -= 1009ull << 52;
+      scale = asdouble (sbits);
+      return 0x1p1009 * (scale + scale * tmp);
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  /* Note: sbits is signed scale.  */
+  scale = asdouble (sbits);
+  double y = scale + scale * tmp;
+  return 0x1p-1022 * y;
+}
+
+/* Scalar fallback for special cases of SVE pow's exp.  */
+static inline svfloat64_t
+sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
+		     svfloat64_t y, svbool_t cmp)
+{
+  svbool_t p = svpfirst (cmp, svpfalse ());
+  while (svptest_any (cmp, p))
+    {
+      double sx1 = svclastb (p, 0, x1);
+      uint64_t su1 = svclastb (p, 0, u1);
+      uint64_t su2 = svclastb (p, 0, u2);
+      double elem = specialcase (sx1, su1, su2);
+      svfloat64_t y2 = sv_f64 (elem);
+      y = svsel (p, y2, y);
+      p = svpnext_b64 (cmp, p);
+    }
+  return y;
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+   additional 15 bits precision.  IX is the bit representation of x, but
+   normalized in the subnormal range using the sign bit for the exponent.  */
+static inline svfloat64_t
+sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
+{
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, Off);
+  svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),
+			  sv_u64 (N_LOG - 1));
+  svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52)));
+  svfloat64_t z = svreinterpret_f64 (iz);
+  svfloat64_t kd = svcvt_f64_x (pg, k);
+
+  /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+  /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version
+     that uses array of structures. We also do the lookup earlier in the code to
+     make sure it finishes as early as possible.  */
+  svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);
+  svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);
+
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
+  svfloat64_t r = svmad_x (pg, z, invc, -1.0);
+  /* k*Ln2 + log(c) + r.  */
+  svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi);
+  svfloat64_t t2 = svadd_x (pg, t1, r);
+  svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo);
+  svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);
+
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5.  */
+  svfloat64_t ar2 = svmul_x (pg, r, ar);
+  svfloat64_t ar3 = svmul_x (pg, r, ar2);
+  /* k*Ln2 + log(c) + r + A[0]*r*r.  */
+  svfloat64_t hi = svadd_x (pg, t2, ar2);
+  svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r);
+  svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);
+  /* p = log1p(r) - r - A[0]*r*r.  */
+  /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *
+     A[6])))).  */
+  svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]);
+  svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]);
+  svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]);
+  svfloat64_t p = svmla_x (pg, a34, ar2, a56);
+  p = svmla_x (pg, a12, ar2, p);
+  p = svmul_x (pg, ar3, p);
+  svfloat64_t lo = svadd_x (
+      pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
+  svfloat64_t y = svadd_x (pg, hi, lo);
+  *tail = svadd_x (pg, svsub_x (pg, hi, y), lo);
+  return y;
+}
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1.  */
+static inline svfloat64_t
+sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
+	       svuint64_t sign_bias)
+{
+  /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)
+     and other cases of large values of x (scale * (1 + TMP) oflow).  */
+  svuint64_t abstop = svand_x (pg, sv_top12 (x), 0x7ff);
+  /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54).  */
+  svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);
+
+  /* Conditions special, uflow and oflow are all expressed as uoflow &&
+     something, hence do not bother computing anything if no lane in uoflow is
+     true.  */
+  svbool_t special = svpfalse_b ();
+  svbool_t uflow = svpfalse_b ();
+  svbool_t oflow = svpfalse_b ();
+  if (__glibc_unlikely (svptest_any (pg, uoflow)))
+    {
+      /* |x| is tiny (|x| <= 0x1p-54).  */
+      uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
+      uflow = svand_z (pg, uoflow, uflow);
+      /* |x| is huge (|x| >= 1024).  */
+      oflow = svcmpge (pg, abstop, HugeExp);
+      oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));
+      /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
+	 or underflow.  */
+      special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+    }
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2);
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift);
+  svfloat64_t kd = svadd_x (pg, z, shift);
+  svuint64_t ki = svreinterpret_u64 (kd);
+  kd = svsub_x (pg, kd, shift);
+  svfloat64_t r = x;
+  r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi);
+  r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo);
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r = svadd_x (pg, r, xtail);
+  /* 2^(k/N) ~= scale.  */
+  svuint64_t idx = svand_x (pg, ki, N_EXP - 1);
+  svuint64_t top
+      = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
+  sbits = svadd_x (pg, sbits, top);
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]);
+  tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp);
+  tmp = svmla_x (pg, r, r2, tmp);
+  svfloat64_t scale = svreinterpret_f64 (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  z = svmla_x (pg, scale, scale, tmp);
+
+  /* Update result with special and large cases.  */
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    z = sv_call_specialcase (tmp, sbits, ki, z, special);
+
+  /* Handle underflow and overflow.  */
+  svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63);
+  svbool_t x_is_neg = svcmpne (pg, sign_bit, 0);
+  svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
+  svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
+  res_uoflow = svreinterpret_f64 (
+      svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
+  z = svsel (oflow, res_uoflow, z);
+  /* Avoid spurious underflow for tiny x.  */
+  svfloat64_t res_spurious_uflow
+      = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
+  z = svsel (uflow, res_spurious_uflow, z);
+
+  return z;
+}
+
+static inline double
+pow_sc (double x, double y)
+{
+  uint64_t ix = asuint64 (x);
+  uint64_t iy = asuint64 (y);
+  /* Special cases: |x| or |y| is 0, inf or nan.  */
+  if (__glibc_unlikely (zeroinfnan (iy)))
+    {
+      if (2 * iy == 0)
+	return issignaling_inline (x) ? x + y : 1.0;
+      if (ix == asuint64 (1.0))
+	return issignaling_inline (y) ? x + y : 1.0;
+      if (2 * ix > 2 * asuint64 (INFINITY) || 2 * iy > 2 * asuint64 (INFINITY))
+	return x + y;
+      if (2 * ix == 2 * asuint64 (1.0))
+	return 1.0;
+      if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+	return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+      return y * y;
+    }
+  if (__glibc_unlikely (zeroinfnan (ix)))
+    {
+      double_t x2 = x * x;
+      if (ix >> 63 && checkint (iy) == 1)
+	x2 = -x2;
+      return (iy >> 63) ? 1 / x2 : x2;
+    }
+  return x;
+}
+
+svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
+{
+  /* This preamble handles special case conditions used in the final scalar
+     fallbacks. It also updates ix and sign_bias, that are used in the core
+     computation too, i.e., exp( y * log (x) ).  */
+  svuint64_t vix0 = svreinterpret_u64 (x);
+  svuint64_t viy0 = svreinterpret_u64 (y);
+  svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52);
+
+  /* Negative x cases.  */
+  svuint64_t sign_bit = svlsr_m (pg, vix0, 63);
+  svbool_t xisneg = svcmpeq (pg, sign_bit, 1);
+
+  /* Set sign_bias and ix depending on sign of x and nature of y.  */
+  svbool_t yisnotint_xisneg = svpfalse_b ();
+  svuint64_t sign_bias = sv_u64 (0);
+  svuint64_t vix = vix0;
+  svuint64_t vtopx1 = vtopx0;
+  if (__glibc_unlikely (svptest_any (pg, xisneg)))
+    {
+      /* Determine nature of y.  */
+      yisnotint_xisneg = sv_isnotint (xisneg, y);
+      svbool_t yisint_xisneg = sv_isint (xisneg, y);
+      svbool_t yisodd_xisneg = sv_isodd (xisneg, y);
+      /* ix set to abs(ix) if y is integer.  */
+      vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff);
+      vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff);
+      /* Set to SignBias if x is negative and y is odd.  */
+      sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0));
+    }
+
+  /* Special cases of x or y: zero, inf and nan.  */
+  svbool_t xspecial = sv_zeroinfnan (pg, vix0);
+  svbool_t yspecial = sv_zeroinfnan (pg, viy0);
+  svbool_t special = svorr_z (pg, xspecial, yspecial);
+
+  /* Small cases of x: |x| < 0x1p-126.  */
+  svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff);
+  svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX);
+  if (__glibc_unlikely (svptest_any (pg, xsmall)))
+    {
+      /* Normalize subnormal x so exponent becomes negative.  */
+      svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0);
+
+      svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));
+      vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);
+      vix_norm = svsub_m (xsmall, vix_norm, 52ULL << 52);
+      vix = svsel (topx_is_null, vix_norm, vix);
+    }
+
+  /* y_hi = log(ix, &y_lo).  */
+  svfloat64_t vlo;
+  svfloat64_t vhi = sv_log_inline (pg, vix, &vlo);
+
+  /* z = exp(y_hi, y_lo, sign_bias).  */
+  svfloat64_t vehi = svmul_x (pg, y, vhi);
+  svfloat64_t velo = svmul_x (pg, y, vlo);
+  svfloat64_t vemi = svmls_x (pg, vehi, y, vhi);
+  velo = svsub_x (pg, velo, vemi);
+  svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias);
+
+  /* Cases of finite y and finite negative x.  */
+  vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz);
+
+  /* Cases of zero/inf/nan x or y.  */
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    vz = sv_call2_f64 (pow_sc, x, y, vz, special);
+
+  return vz;
+}
diff --git a/sysdeps/aarch64/fpu/powf_advsimd.c b/sysdeps/aarch64/fpu/powf_advsimd.c
new file mode 100644
index 0000000000..8232e70436
--- /dev/null
+++ b/sysdeps/aarch64/fpu/powf_advsimd.c
@@ -0,0 +1,210 @@
+/* Single-precision vector (AdvSIMD) pow function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "math_config.h"
+#include "v_math.h"
+
+#define Min v_u32 (0x00800000)
+#define Max v_u32 (0x7f800000)
+#define Thresh v_u32 (0x7f000000) /* Max - Min.  */
+#define MantissaMask v_u32 (0x007fffff)
+
+#define A d->log2_poly
+#define C d->exp2f_poly
+
+/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
+#define Off v_u32 (0x3f35d000)
+
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_EXP2F_TABLE_BITS 5
+#define Log2IdxMask ((1 << V_POWF_LOG2_TABLE_BITS) - 1)
+#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))
+
+static const struct data
+{
+  struct
+  {
+    double invc, logc;
+  } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];
+  float64x2_t log2_poly[4];
+  uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];
+  float64x2_t exp2f_poly[3];
+} data = {
+  .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},
+	       {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},
+	       {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},
+	       {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},
+	       {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},
+	       {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},
+	       {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},
+	       {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},
+	       {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},
+	       {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},
+	       {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},
+	       {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},
+	       {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},
+	       {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},
+	       {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},
+	       {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},
+	       {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},
+	       {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},
+	       {0x1p+0, 0x0p+0 * Scale},
+	       {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},
+	       {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},
+	       {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},
+	       {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},
+	       {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},
+	       {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},
+	       {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},
+	       {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},
+	       {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},
+	       {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},
+	       {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},
+	       {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},
+	       {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},
+  .log2_poly = { /* rel err: 1.5 * 2^-30.  */
+		 V2 (-0x1.6ff5daa3b3d7cp-2 * Scale),
+		 V2 (0x1.ec81d03c01aebp-2 * Scale),
+		 V2 (-0x1.71547bb43f101p-1 * Scale),
+		 V2 (0x1.7154764a815cbp0 * Scale)},
+  .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
+		0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
+		0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
+		0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
+		0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
+		0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
+		0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
+		0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
+		0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},
+  .exp2f_poly = { /* rel err: 1.69 * 2^-34.  */
+		  V2 (0x1.c6af84b912394p-5 / Scale / Scale / Scale),
+		  V2 (0x1.ebfce50fac4f3p-3 / Scale / Scale),
+		  V2 (0x1.62e42ff0c52d6p-1 / Scale)}};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
+{
+  return v_call2_f32 (powf, x, y, ret, cmp);
+}
+
+static inline float64x2_t
+ylogx_core (const struct data *d, float64x2_t iz, float64x2_t k,
+	    float64x2_t invc, float64x2_t logc, float64x2_t y)
+{
+
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), iz, invc);
+  float64x2_t y0 = vaddq_f64 (logc, k);
+
+  /* Polynomial to approximate log1p(r)/ln2.  */
+  float64x2_t logx = vfmaq_f64 (A[1], r, A[0]);
+  logx = vfmaq_f64 (A[2], logx, r);
+  logx = vfmaq_f64 (A[3], logx, r);
+  logx = vfmaq_f64 (y0, logx, r);
+
+  return vmulq_f64 (logx, y);
+}
+
+static inline float64x2_t
+log2_lookup (const struct data *d, uint32_t i)
+{
+  return vld1q_f64 (
+      &d->log2_tab[(i >> (23 - V_POWF_LOG2_TABLE_BITS)) & Log2IdxMask].invc);
+}
+
+static inline uint64x1_t
+exp2f_lookup (const struct data *d, uint64_t i)
+{
+  return vld1_u64 (&d->exp2f_tab[i % (1 << V_EXP2F_TABLE_BITS)]);
+}
+
+static inline float32x2_t
+powf_core (const struct data *d, float64x2_t ylogx)
+{
+  /* N*x = k + r with r in [-1/2, 1/2].  */
+  float64x2_t kd = vrndnq_f64 (ylogx);
+  int64x2_t ki = vcvtaq_s64_f64 (ylogx);
+  float64x2_t r = vsubq_f64 (ylogx, kd);
+
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
+  uint64x2_t t = vcombine_u64 (exp2f_lookup (d, vgetq_lane_s64 (ki, 0)),
+			       exp2f_lookup (d, vgetq_lane_s64 (ki, 1)));
+  t = vaddq_u64 (
+      t, vreinterpretq_u64_s64 (vshlq_n_s64 (ki, 52 - V_EXP2F_TABLE_BITS)));
+  float64x2_t s = vreinterpretq_f64_u64 (t);
+  float64x2_t p = vfmaq_f64 (C[1], r, C[0]);
+  p = vfmaq_f64 (C[2], r, p);
+  p = vfmaq_f64 (s, p, vmulq_f64 (s, r));
+  return vcvt_f32_f64 (p);
+}
+
+float32x4_t VPCS_ATTR V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint32x4_t u = vreinterpretq_u32_f32 (x);
+  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);
+  uint32x4_t tmp = vsubq_u32 (u, Off);
+  uint32x4_t top = vbicq_u32 (tmp, MantissaMask);
+  float32x4_t iz = vreinterpretq_f32_u32 (vsubq_u32 (u, top));
+  int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),
+			     23 - V_EXP2F_TABLE_BITS); /* arithmetic shift.  */
+
+  /* Use double precision for each lane: split input vectors into lo and hi
+     halves and promote.  */
+  float64x2_t tab0 = log2_lookup (d, vgetq_lane_u32 (tmp, 0)),
+	      tab1 = log2_lookup (d, vgetq_lane_u32 (tmp, 1)),
+	      tab2 = log2_lookup (d, vgetq_lane_u32 (tmp, 2)),
+	      tab3 = log2_lookup (d, vgetq_lane_u32 (tmp, 3));
+
+  float64x2_t iz_lo = vcvt_f64_f32 (vget_low_f32 (iz)),
+	      iz_hi = vcvt_high_f64_f32 (iz);
+
+  float64x2_t k_lo = vcvtq_f64_s64 (vmovl_s32 (vget_low_s32 (k))),
+	      k_hi = vcvtq_f64_s64 (vmovl_high_s32 (k));
+
+  float64x2_t invc_lo = vzip1q_f64 (tab0, tab1),
+	      invc_hi = vzip1q_f64 (tab2, tab3),
+	      logc_lo = vzip2q_f64 (tab0, tab1),
+	      logc_hi = vzip2q_f64 (tab2, tab3);
+
+  float64x2_t y_lo = vcvt_f64_f32 (vget_low_f32 (y)),
+	      y_hi = vcvt_high_f64_f32 (y);
+
+  float64x2_t ylogx_lo = ylogx_core (d, iz_lo, k_lo, invc_lo, logc_lo, y_lo);
+  float64x2_t ylogx_hi = ylogx_core (d, iz_hi, k_hi, invc_hi, logc_hi, y_hi);
+
+  uint32x4_t ylogx_top = vuzp2q_u32 (vreinterpretq_u32_f64 (ylogx_lo),
+				     vreinterpretq_u32_f64 (ylogx_hi));
+
+  cmp = vorrq_u32 (
+      cmp, vcgeq_u32 (vandq_u32 (vshrq_n_u32 (ylogx_top, 15), v_u32 (0xffff)),
+		      vdupq_n_u32 (asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS))
+				   >> 47)));
+
+  float32x2_t p_lo = powf_core (d, ylogx_lo);
+  float32x2_t p_hi = powf_core (d, ylogx_hi);
+
+  if (__glibc_unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, vcombine_f32 (p_lo, p_hi), cmp);
+  return vcombine_f32 (p_lo, p_hi);
+}
+libmvec_hidden_def (V_NAME_F2 (pow))
+HALF_WIDTH_ALIAS_F2(pow)
diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c
new file mode 100644
index 0000000000..4f6a142325
--- /dev/null
+++ b/sysdeps/aarch64/fpu/powf_sve.c
@@ -0,0 +1,335 @@
+/* Single-precision vector (SVE) pow function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "../ieee754/flt-32/math_config.h"
+#include "sv_math.h"
+
+/* The following data is used in the SVE pow core computation
+   and special case detection.  */
+#define Tinvc __v_powf_data.invc
+#define Tlogc __v_powf_data.logc
+#define Texp __v_powf_data.scale
+#define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
+#define Shift 0x1.8p52
+#define Norm 0x1p23f /* 0x4b000000.  */
+
+/* Overall ULP error bound for pow is 2.6 ulp
+   ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
+static const struct data
+{
+  double log_poly[4];
+  double exp_poly[3];
+  float uflow_bound, oflow_bound, small_bound;
+  uint32_t sign_bias, sign_mask, subnormal_bias, off;
+} data = {
+  /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
+     V_POWF_EXP2_N.  */
+  .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,
+		-0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },
+  /* rel err: 1.69 * 2^-34.  */
+  .exp_poly = {
+    0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3.  */
+    0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2.  */
+    0x1.62e42ff0c52d6p-6,   /* A3 / V_POWF_EXP2_N.  */
+  },
+  .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N.  */
+  .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N.  */
+  .small_bound = 0x1p-126f,
+  .off = 0x3f35d000,
+  .sign_bias = SignBias,
+  .sign_mask = 0x80000000,
+  .subnormal_bias = 0x0b800000, /* 23 << 23.  */
+};
+
+#define A(i) sv_f64 (d->log_poly[i])
+#define C(i) sv_f64 (d->exp_poly[i])
+
+/* Check if x is an integer.  */
+static inline svbool_t
+svisint (svbool_t pg, svfloat32_t x)
+{
+  return svcmpeq (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is real not integer valued.  */
+static inline svbool_t
+svisnotint (svbool_t pg, svfloat32_t x)
+{
+  return svcmpne (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is an odd integer.  */
+static inline svbool_t
+svisodd (svbool_t pg, svfloat32_t x)
+{
+  svfloat32_t y = svmul_x (pg, x, 0.5f);
+  return svisnotint (pg, y);
+}
+
+/* Check if zero, inf or nan.  */
+static inline svbool_t
+sv_zeroinfnan (svbool_t pg, svuint32_t i)
+{
+  return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1),
+		  2u * 0x7f800000 - 1);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int
+checkint (uint32_t iy)
+{
+  int e = iy >> 23 & 0xff;
+  if (e < 0x7f)
+    return 0;
+  if (e > 0x7f + 23)
+    return 2;
+  if (iy & ((1 << (0x7f + 23 - e)) - 1))
+    return 0;
+  if (iy & (1 << (0x7f + 23 - e)))
+    return 1;
+  return 2;
+}
+
+/* Check if zero, inf or nan.  */
+static inline int
+zeroinfnan (uint32_t ix)
+{
+  return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
+
+/* A scalar subroutine used to fix main power special cases. Similar to the
+   preamble of scalar powf except that we do not update ix and sign_bias. This
+   is done in the preamble of the SVE powf.  */
+static inline float
+powf_specialcase (float x, float y, float z)
+{
+  uint32_t ix = asuint (x);
+  uint32_t iy = asuint (y);
+  /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+  if (__glibc_unlikely (zeroinfnan (iy)))
+    {
+      if (2 * iy == 0)
+	return issignalingf_inline (x) ? x + y : 1.0f;
+      if (ix == 0x3f800000)
+	return issignalingf_inline (y) ? x + y : 1.0f;
+      if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
+	return x + y;
+      if (2 * ix == 2 * 0x3f800000)
+	return 1.0f;
+      if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+	return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+      return y * y;
+    }
+  if (__glibc_unlikely (zeroinfnan (ix)))
+    {
+      float_t x2 = x * x;
+      if (ix & 0x80000000 && checkint (iy) == 1)
+	x2 = -x2;
+      return iy & 0x80000000 ? 1 / x2 : x2;
+    }
+  /* We need a return here in case x<0 and y is integer, but all other tests
+   need to be run.  */
+  return z;
+}
+
+/* Scalar fallback for special case routines with custom signature.  */
+static inline svfloat32_t
+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
+{
+  svbool_t p = svpfirst (cmp, svpfalse ());
+  while (svptest_any (cmp, p))
+    {
+      float sx1 = svclastb (p, 0, x1);
+      float sx2 = svclastb (p, 0, x2);
+      float elem = svclastb (p, 0, y);
+      elem = powf_specialcase (sx1, sx2, elem);
+      svfloat32_t y2 = sv_f32 (elem);
+      y = svsel (p, y2, y);
+      p = svpnext_b32 (cmp, p);
+    }
+  return y;
+}
+
+/* Compute core for half of the lanes in double precision.  */
+static inline svfloat64_t
+sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
+		  svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
+		  const struct data *d)
+{
+  svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);
+  svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);
+
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
+  svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);
+  svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));
+
+  /* Polynomial to approximate log1p(r)/ln2.  */
+  svfloat64_t logx = A (0);
+  logx = svmla_x (pg, A (1), r, logx);
+  logx = svmla_x (pg, A (2), r, logx);
+  logx = svmla_x (pg, A (3), r, logx);
+  logx = svmla_x (pg, y0, r, logx);
+  *pylogx = svmul_x (pg, y, logx);
+
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  svfloat64_t kd = svadd_x (pg, *pylogx, Shift);
+  svuint64_t ki = svreinterpret_u64 (kd);
+  kd = svsub_x (pg, kd, Shift);
+
+  r = svsub_x (pg, *pylogx, kd);
+
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
+  svuint64_t t
+      = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1));
+  svuint64_t ski = svadd_x (pg, ki, sign_bias);
+  t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS));
+  svfloat64_t s = svreinterpret_f64 (t);
+
+  svfloat64_t p = C (0);
+  p = svmla_x (pg, C (1), p, r);
+  p = svmla_x (pg, C (2), p, r);
+  p = svmla_x (pg, s, p, svmul_x (pg, s, r));
+
+  return p;
+}
+
+/* Widen vector to double precision and compute core on both halves of the
+   vector. Lower cost of promotion by considering all lanes active.  */
+static inline svfloat32_t
+sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
+	      svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
+	      const struct data *d)
+{
+  const svbool_t ptrue = svptrue_b64 ();
+
+  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
+     order to perform core computation in double precision.  */
+  const svbool_t pg_lo = svunpklo (pg);
+  const svbool_t pg_hi = svunpkhi (pg);
+  svfloat64_t y_lo = svcvt_f64_x (
+      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+  svfloat64_t y_hi = svcvt_f64_x (
+      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+  svfloat32_t z = svreinterpret_f32 (iz);
+  svfloat64_t z_lo = svcvt_f64_x (
+      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
+  svfloat64_t z_hi = svcvt_f64_x (
+      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
+  svuint64_t i_lo = svunpklo (i);
+  svuint64_t i_hi = svunpkhi (i);
+  svint64_t k_lo = svunpklo (k);
+  svint64_t k_hi = svunpkhi (k);
+  svuint64_t sign_bias_lo = svunpklo (sign_bias);
+  svuint64_t sign_bias_hi = svunpkhi (sign_bias);
+
+  /* Compute each part in double precision.  */
+  svfloat64_t ylogx_lo, ylogx_hi;
+  svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,
+				     sign_bias_lo, &ylogx_lo, d);
+  svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,
+				     sign_bias_hi, &ylogx_hi, d);
+
+  /* Convert back to single-precision and interleave.  */
+  svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);
+  svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);
+  *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);
+  svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);
+  svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);
+  return svuzp1 (lo_32, hi_32);
+}
+
+/* Implementation of SVE powf.
+   Provides the same accuracy as AdvSIMD powf, since it relies on the same
+   algorithm. The theoretical maximum error is under 2.60 ULPs.
+   Maximum measured error is 2.56 ULPs:
+   SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127
+						   want 0x1.fd4b06p+127.  */
+svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint32_t vix0 = svreinterpret_u32 (x);
+  svuint32_t viy0 = svreinterpret_u32 (y);
+
+  /* Negative x cases.  */
+  svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask);
+  svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask);
+
+  /* Set sign_bias and ix depending on sign of x and nature of y.  */
+  svbool_t yisnotint_xisneg = svpfalse_b ();
+  svuint32_t sign_bias = sv_u32 (0);
+  svuint32_t vix = vix0;
+  if (__glibc_unlikely (svptest_any (pg, xisneg)))
+    {
+      /* Determine nature of y.  */
+      yisnotint_xisneg = svisnotint (xisneg, y);
+      svbool_t yisint_xisneg = svisint (xisneg, y);
+      svbool_t yisodd_xisneg = svisodd (xisneg, y);
+      /* ix set to abs(ix) if y is integer.  */
+      vix = svand_m (yisint_xisneg, vix0, 0x7fffffff);
+      /* Set to SignBias if x is negative and y is odd.  */
+      sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
+    }
+
+  /* Special cases of x or y: zero, inf and nan.  */
+  svbool_t xspecial = sv_zeroinfnan (pg, vix0);
+  svbool_t yspecial = sv_zeroinfnan (pg, viy0);
+  svbool_t cmp = svorr_z (pg, xspecial, yspecial);
+
+  /* Small cases of x: |x| < 0x1p-126.  */
+  svbool_t xsmall = svaclt (pg, x, d->small_bound);
+  if (__glibc_unlikely (svptest_any (pg, xsmall)))
+    {
+      /* Normalize subnormal x so exponent becomes negative.  */
+      svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
+      vix_norm = svand_x (xsmall, vix_norm, 0x7fffffff);
+      vix_norm = svsub_x (xsmall, vix_norm, d->subnormal_bias);
+      vix = svsel (xsmall, vix_norm, vix);
+    }
+  /* Part of core computation carried in working precision.  */
+  svuint32_t tmp = svsub_x (pg, vix, d->off);
+  svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+			  V_POWF_LOG2_N - 1);
+  svuint32_t top = svand_x (pg, tmp, 0xff800000);
+  svuint32_t iz = svsub_x (pg, vix, top);
+  svint32_t k
+      = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
+
+  /* Compute core in extended precision and return intermediate ylogx results to
+      handle cases of underflow and underflow in exp.  */
+  svfloat32_t ylogx;
+  svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
+
+  /* Handle exp special cases of underflow and overflow.  */
+  svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
+  svfloat32_t ret_oflow
+      = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY)));
+  svfloat32_t ret_uflow = svreinterpret_f32 (sign);
+  ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret);
+  ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret);
+
+  /* Cases of finite y and finite negative x.  */
+  ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret);
+
+  if (__glibc_unlikely (svptest_any (pg, cmp)))
+    return sv_call_powf_sc (x, y, ret, cmp);
+
+  return ret;
+}
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
index fa3723b10c..3e3b76c502 100644
--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
@@ -22,8 +22,9 @@
 
 static const struct data
 {
-  float64x2_t poly[11];
-  float64x2_t inv_ln2, m_ln2, shift;
+  float64x2_t poly[11], inv_ln2;
+  double m_ln2[2];
+  float64x2_t shift;
   uint64x2_t halff;
   int64x2_t onef;
 #if WANT_SIMD_EXCEPT
@@ -40,7 +41,7 @@ static const struct data
 	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
 
   .inv_ln2 = V2 (0x1.71547652b82fep0),
-  .m_ln2 = (float64x2_t) {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
+  .m_ln2 = {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
   .shift = V2 (0x1.8p52),
 
   .halff = V2 (0x3fe0000000000000),
@@ -67,8 +68,10 @@ expm1_inline (float64x2_t x)
      and   f = x - i * ln2 (f in [-ln2/2, ln2/2]).  */
   float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
   int64x2_t i = vcvtq_s64_f64 (j);
-  float64x2_t f = vfmaq_laneq_f64 (x, j, d->m_ln2, 0);
-  f = vfmaq_laneq_f64 (f, j, d->m_ln2, 1);
+
+  float64x2_t m_ln2 = vld1q_f64 (d->m_ln2);
+  float64x2_t f = vfmaq_laneq_f64 (x, j, m_ln2, 0);
+  f = vfmaq_laneq_f64 (f, j, m_ln2, 1);
   /* Approximate expm1(f) using polynomial.  */
   float64x2_t f2 = vmulq_f64 (f, f);
   float64x2_t f4 = vmulq_f64 (f2, f2);
diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c
index 0459821ab2..d56a102dd1 100644
--- a/sysdeps/aarch64/fpu/tan_advsimd.c
+++ b/sysdeps/aarch64/fpu/tan_advsimd.c
@@ -23,7 +23,8 @@
 static const struct data
 {
   float64x2_t poly[9];
-  float64x2_t half_pi, two_over_pi, shift;
+  double half_pi[2];
+  float64x2_t two_over_pi, shift;
 #if !WANT_SIMD_EXCEPT
   float64x2_t range_val;
 #endif
@@ -81,8 +82,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
   /* Use q to reduce x to r in [-pi/4, pi/4], by:
      r = x - q * pi/2, in extended precision.  */
   float64x2_t r = x;
-  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
-  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
+  float64x2_t half_pi = vld1q_f64 (dat->half_pi);
+  r = vfmsq_laneq_f64 (r, q, half_pi, 0);
+  r = vfmsq_laneq_f64 (r, q, half_pi, 1);
   /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
      formula.  */
   r = vmulq_n_f64 (r, 0.5);
diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c
index 5a7489390a..705586f0c0 100644
--- a/sysdeps/aarch64/fpu/tanf_advsimd.c
+++ b/sysdeps/aarch64/fpu/tanf_advsimd.c
@@ -23,7 +23,7 @@
 static const struct data
 {
   float32x4_t poly[6];
-  float32x4_t pi_consts;
+  float pi_consts[4];
   float32x4_t shift;
 #if !WANT_SIMD_EXCEPT
   float32x4_t range_val;
@@ -95,16 +95,17 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
 #endif
 
   /* n = rint(x/(pi/2)).  */
-  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
+  float32x4_t pi_consts = vld1q_f32 (d->pi_consts);
+  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, pi_consts, 3);
   float32x4_t n = vsubq_f32 (q, d->shift);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
 
   /* r = x - n * (pi/2)  (range reduction into -pi./4 .. pi/4).  */
   float32x4_t r;
-  r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
+  r = vfmaq_laneq_f32 (x, n, pi_consts, 0);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 1);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 2);
 
   /* If x lives in an interval, where |tan(x)|
      - is finite, then use a polynomial approximation of the form
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index f2d8714075..8c98161662 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -30,6 +30,7 @@ VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
 VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
 VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
 VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
+VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt)
 VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
 VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
 VPCS_VECTOR_WRAPPER (erf_advsimd, _ZGVnN2v_erf)
@@ -38,10 +39,12 @@ VPCS_VECTOR_WRAPPER (exp_advsimd, _ZGVnN2v_exp)
 VPCS_VECTOR_WRAPPER (exp10_advsimd, _ZGVnN2v_exp10)
 VPCS_VECTOR_WRAPPER (exp2_advsimd, _ZGVnN2v_exp2)
 VPCS_VECTOR_WRAPPER (expm1_advsimd, _ZGVnN2v_expm1)
+VPCS_VECTOR_WRAPPER_ff (hypot_advsimd, _ZGVnN2vv_hypot)
 VPCS_VECTOR_WRAPPER (log_advsimd, _ZGVnN2v_log)
 VPCS_VECTOR_WRAPPER (log10_advsimd, _ZGVnN2v_log10)
 VPCS_VECTOR_WRAPPER (log1p_advsimd, _ZGVnN2v_log1p)
 VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2)
+VPCS_VECTOR_WRAPPER_ff (pow_advsimd, _ZGVnN2vv_pow)
 VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
 VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
 VPCS_VECTOR_WRAPPER (tan_advsimd, _ZGVnN2v_tan)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 37873d5e43..2583428af5 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -49,6 +49,7 @@ SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
 SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
 SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
 SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
+SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt)
 SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
 SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
 SVE_VECTOR_WRAPPER (erf_sve, _ZGVsMxv_erf)
@@ -57,10 +58,12 @@ SVE_VECTOR_WRAPPER (exp_sve, _ZGVsMxv_exp)
 SVE_VECTOR_WRAPPER (exp10_sve, _ZGVsMxv_exp10)
 SVE_VECTOR_WRAPPER (exp2_sve, _ZGVsMxv_exp2)
 SVE_VECTOR_WRAPPER (expm1_sve, _ZGVsMxv_expm1)
+SVE_VECTOR_WRAPPER_ff (hypot_sve, _ZGVsMxvv_hypot)
 SVE_VECTOR_WRAPPER (log_sve, _ZGVsMxv_log)
 SVE_VECTOR_WRAPPER (log10_sve, _ZGVsMxv_log10)
 SVE_VECTOR_WRAPPER (log1p_sve, _ZGVsMxv_log1p)
 SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2)
+SVE_VECTOR_WRAPPER_ff (pow_sve, _ZGVsMxvv_pow)
 SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
 SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
 SVE_VECTOR_WRAPPER (tan_sve, _ZGVsMxv_tan)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 08e33115b9..26679018d6 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -30,6 +30,7 @@ VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
 VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
 VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
 VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
+VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf)
 VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
 VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
 VPCS_VECTOR_WRAPPER (erff_advsimd, _ZGVnN4v_erff)
@@ -38,10 +39,12 @@ VPCS_VECTOR_WRAPPER (expf_advsimd, _ZGVnN4v_expf)
 VPCS_VECTOR_WRAPPER (exp10f_advsimd, _ZGVnN4v_exp10f)
 VPCS_VECTOR_WRAPPER (exp2f_advsimd, _ZGVnN4v_exp2f)
 VPCS_VECTOR_WRAPPER (expm1f_advsimd, _ZGVnN4v_expm1f)
+VPCS_VECTOR_WRAPPER_ff (hypotf_advsimd, _ZGVnN4vv_hypotf)
 VPCS_VECTOR_WRAPPER (logf_advsimd, _ZGVnN4v_logf)
 VPCS_VECTOR_WRAPPER (log10f_advsimd, _ZGVnN4v_log10f)
 VPCS_VECTOR_WRAPPER (log1pf_advsimd, _ZGVnN4v_log1pf)
 VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f)
+VPCS_VECTOR_WRAPPER_ff (powf_advsimd, _ZGVnN4vv_powf)
 VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
 VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
 VPCS_VECTOR_WRAPPER (tanf_advsimd, _ZGVnN4v_tanf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 025daa662e..0f972b7886 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -49,6 +49,7 @@ SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
 SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
 SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
 SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
+SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf)
 SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
 SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
 SVE_VECTOR_WRAPPER (erff_sve, _ZGVsMxv_erff)
@@ -57,10 +58,12 @@ SVE_VECTOR_WRAPPER (expf_sve, _ZGVsMxv_expf)
 SVE_VECTOR_WRAPPER (exp10f_sve, _ZGVsMxv_exp10f)
 SVE_VECTOR_WRAPPER (exp2f_sve, _ZGVsMxv_exp2f)
 SVE_VECTOR_WRAPPER (expm1f_sve, _ZGVsMxv_expm1f)
+SVE_VECTOR_WRAPPER_ff (hypotf_sve, _ZGVsMxvv_hypotf)
 SVE_VECTOR_WRAPPER (logf_sve, _ZGVsMxv_logf)
 SVE_VECTOR_WRAPPER (log10f_sve, _ZGVsMxv_log10f)
 SVE_VECTOR_WRAPPER (log1pf_sve, _ZGVsMxv_log1pf)
 SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f)
+SVE_VECTOR_WRAPPER_ff (powf_sve, _ZGVsMxvv_powf)
 SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
 SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
 SVE_VECTOR_WRAPPER (tanf_sve, _ZGVsMxv_tanf)
diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h
index a3b0e32f9e..08b06e0a6b 100644
--- a/sysdeps/aarch64/fpu/v_expf_inline.h
+++ b/sysdeps/aarch64/fpu/v_expf_inline.h
@@ -25,7 +25,8 @@
 struct v_expf_data
 {
   float32x4_t poly[5];
-  float32x4_t shift, invln2_and_ln2;
+  float32x4_t shift;
+  float invln2_and_ln2[4];
 };
 
 /* maxerr: 1.45358 +0.5 ulp.  */
@@ -50,10 +51,11 @@ v_expf_inline (float32x4_t x, const struct v_expf_data *d)
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
   float32x4_t n, r, z;
-  z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
+  float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
+  z = vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0);
   n = vsubq_f32 (z, d->shift);
-  r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
-  r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
+  r = vfmsq_laneq_f32 (x, n, invln2_and_ln2, 1);
+  r = vfmsq_laneq_f32 (r, n, invln2_and_ln2, 2);
   uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
   float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
 
diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h
index 337ccfbfab..59b552da6b 100644
--- a/sysdeps/aarch64/fpu/v_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h
@@ -26,7 +26,8 @@
 struct v_expm1f_data
 {
   float32x4_t poly[5];
-  float32x4_t invln2_and_ln2, shift;
+  float invln2_and_ln2[4];
+  float32x4_t shift;
   int32x4_t exponent_bias;
 };
 
@@ -49,11 +50,12 @@ expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
      calling routine should handle special values if required.  */
 
   /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  float32x4_t j = vsubq_f32 (
-      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+  float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
+  float32x4_t j
+      = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
   int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+  float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
+  f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
 
   /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
      Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
diff --git a/sysdeps/aarch64/fpu/v_pow_exp_data.c b/sysdeps/aarch64/fpu/v_pow_exp_data.c
new file mode 100644
index 0000000000..8b7fb83668
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_pow_exp_data.c
@@ -0,0 +1,301 @@
+/* Shared data between exp, exp2 and pow.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "vecmath_config.h"
+
+#define N (1 << V_POW_EXP_TABLE_BITS)
+
+const struct v_pow_exp_data __v_pow_exp_data = {
+// exp polynomial coefficients.
+.poly = {
+// abs error: 1.43*2^-58
+// ulp error: 0.549 (0.550 without fma)
+// if |x| < ln2/512
+0x1.fffffffffffd4p-2,
+0x1.5555571d6ef9p-3,
+0x1.5555576a5adcep-5,
+},
+// N/ln2
+.n_over_ln2 = 0x1.71547652b82fep0 * N,
+// ln2/N
+.ln2_over_n_hi = 0x1.62e42fefc0000p-9,
+.ln2_over_n_lo = -0x1.c610ca86c3899p-45,
+// Used for rounding to nearest integer without using intrinsics.
+.shift = 0x1.8p52,
+// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+// sbits[k] = asuint64(H[k]) - (k << 52)/N
+.sbits = {
+0x3ff0000000000000,
+0x3feffb1afa5abcbf,
+0x3feff63da9fb3335,
+0x3feff168143b0281,
+0x3fefec9a3e778061,
+0x3fefe7d42e11bbcc,
+0x3fefe315e86e7f85,
+0x3fefde5f72f654b1,
+0x3fefd9b0d3158574,
+0x3fefd50a0e3c1f89,
+0x3fefd06b29ddf6de,
+0x3fefcbd42b72a836,
+0x3fefc74518759bc8,
+0x3fefc2bdf66607e0,
+0x3fefbe3ecac6f383,
+0x3fefb9c79b1f3919,
+0x3fefb5586cf9890f,
+0x3fefb0f145e46c85,
+0x3fefac922b7247f7,
+0x3fefa83b23395dec,
+0x3fefa3ec32d3d1a2,
+0x3fef9fa55fdfa9c5,
+0x3fef9b66affed31b,
+0x3fef973028d7233e,
+0x3fef9301d0125b51,
+0x3fef8edbab5e2ab6,
+0x3fef8abdc06c31cc,
+0x3fef86a814f204ab,
+0x3fef829aaea92de0,
+0x3fef7e95934f312e,
+0x3fef7a98c8a58e51,
+0x3fef76a45471c3c2,
+0x3fef72b83c7d517b,
+0x3fef6ed48695bbc0,
+0x3fef6af9388c8dea,
+0x3fef672658375d2f,
+0x3fef635beb6fcb75,
+0x3fef5f99f8138a1c,
+0x3fef5be084045cd4,
+0x3fef582f95281c6b,
+0x3fef54873168b9aa,
+0x3fef50e75eb44027,
+0x3fef4d5022fcd91d,
+0x3fef49c18438ce4d,
+0x3fef463b88628cd6,
+0x3fef42be3578a819,
+0x3fef3f49917ddc96,
+0x3fef3bdda27912d1,
+0x3fef387a6e756238,
+0x3fef351ffb82140a,
+0x3fef31ce4fb2a63f,
+0x3fef2e85711ece75,
+0x3fef2b4565e27cdd,
+0x3fef280e341ddf29,
+0x3fef24dfe1f56381,
+0x3fef21ba7591bb70,
+0x3fef1e9df51fdee1,
+0x3fef1b8a66d10f13,
+0x3fef187fd0dad990,
+0x3fef157e39771b2f,
+0x3fef1285a6e4030b,
+0x3fef0f961f641589,
+0x3fef0cafa93e2f56,
+0x3fef09d24abd886b,
+0x3fef06fe0a31b715,
+0x3fef0432edeeb2fd,
+0x3fef0170fc4cd831,
+0x3feefeb83ba8ea32,
+0x3feefc08b26416ff,
+0x3feef96266e3fa2d,
+0x3feef6c55f929ff1,
+0x3feef431a2de883b,
+0x3feef1a7373aa9cb,
+0x3feeef26231e754a,
+0x3feeecae6d05d866,
+0x3feeea401b7140ef,
+0x3feee7db34e59ff7,
+0x3feee57fbfec6cf4,
+0x3feee32dc313a8e5,
+0x3feee0e544ede173,
+0x3feedea64c123422,
+0x3feedc70df1c5175,
+0x3feeda4504ac801c,
+0x3feed822c367a024,
+0x3feed60a21f72e2a,
+0x3feed3fb2709468a,
+0x3feed1f5d950a897,
+0x3feecffa3f84b9d4,
+0x3feece086061892d,
+0x3feecc2042a7d232,
+0x3feeca41ed1d0057,
+0x3feec86d668b3237,
+0x3feec6a2b5c13cd0,
+0x3feec4e1e192aed2,
+0x3feec32af0d7d3de,
+0x3feec17dea6db7d7,
+0x3feebfdad5362a27,
+0x3feebe41b817c114,
+0x3feebcb299fddd0d,
+0x3feebb2d81d8abff,
+0x3feeb9b2769d2ca7,
+0x3feeb8417f4531ee,
+0x3feeb6daa2cf6642,
+0x3feeb57de83f4eef,
+0x3feeb42b569d4f82,
+0x3feeb2e2f4f6ad27,
+0x3feeb1a4ca5d920f,
+0x3feeb070dde910d2,
+0x3feeaf4736b527da,
+0x3feeae27dbe2c4cf,
+0x3feead12d497c7fd,
+0x3feeac0827ff07cc,
+0x3feeab07dd485429,
+0x3feeaa11fba87a03,
+0x3feea9268a5946b7,
+0x3feea84590998b93,
+0x3feea76f15ad2148,
+0x3feea6a320dceb71,
+0x3feea5e1b976dc09,
+0x3feea52ae6cdf6f4,
+0x3feea47eb03a5585,
+0x3feea3dd1d1929fd,
+0x3feea34634ccc320,
+0x3feea2b9febc8fb7,
+0x3feea23882552225,
+0x3feea1c1c70833f6,
+0x3feea155d44ca973,
+0x3feea0f4b19e9538,
+0x3feea09e667f3bcd,
+0x3feea052fa75173e,
+0x3feea012750bdabf,
+0x3fee9fdcddd47645,
+0x3fee9fb23c651a2f,
+0x3fee9f9298593ae5,
+0x3fee9f7df9519484,
+0x3fee9f7466f42e87,
+0x3fee9f75e8ec5f74,
+0x3fee9f8286ead08a,
+0x3fee9f9a48a58174,
+0x3fee9fbd35d7cbfd,
+0x3fee9feb564267c9,
+0x3feea024b1ab6e09,
+0x3feea0694fde5d3f,
+0x3feea0b938ac1cf6,
+0x3feea11473eb0187,
+0x3feea17b0976cfdb,
+0x3feea1ed0130c132,
+0x3feea26a62ff86f0,
+0x3feea2f336cf4e62,
+0x3feea3878491c491,
+0x3feea427543e1a12,
+0x3feea4d2add106d9,
+0x3feea589994cce13,
+0x3feea64c1eb941f7,
+0x3feea71a4623c7ad,
+0x3feea7f4179f5b21,
+0x3feea8d99b4492ed,
+0x3feea9cad931a436,
+0x3feeaac7d98a6699,
+0x3feeabd0a478580f,
+0x3feeace5422aa0db,
+0x3feeae05bad61778,
+0x3feeaf3216b5448c,
+0x3feeb06a5e0866d9,
+0x3feeb1ae99157736,
+0x3feeb2fed0282c8a,
+0x3feeb45b0b91ffc6,
+0x3feeb5c353aa2fe2,
+0x3feeb737b0cdc5e5,
+0x3feeb8b82b5f98e5,
+0x3feeba44cbc8520f,
+0x3feebbdd9a7670b3,
+0x3feebd829fde4e50,
+0x3feebf33e47a22a2,
+0x3feec0f170ca07ba,
+0x3feec2bb4d53fe0d,
+0x3feec49182a3f090,
+0x3feec674194bb8d5,
+0x3feec86319e32323,
+0x3feeca5e8d07f29e,
+0x3feecc667b5de565,
+0x3feece7aed8eb8bb,
+0x3feed09bec4a2d33,
+0x3feed2c980460ad8,
+0x3feed503b23e255d,
+0x3feed74a8af46052,
+0x3feed99e1330b358,
+0x3feedbfe53c12e59,
+0x3feede6b5579fdbf,
+0x3feee0e521356eba,
+0x3feee36bbfd3f37a,
+0x3feee5ff3a3c2774,
+0x3feee89f995ad3ad,
+0x3feeeb4ce622f2ff,
+0x3feeee07298db666,
+0x3feef0ce6c9a8952,
+0x3feef3a2b84f15fb,
+0x3feef68415b749b1,
+0x3feef9728de5593a,
+0x3feefc6e29f1c52a,
+0x3feeff76f2fb5e47,
+0x3fef028cf22749e4,
+0x3fef05b030a1064a,
+0x3fef08e0b79a6f1f,
+0x3fef0c1e904bc1d2,
+0x3fef0f69c3f3a207,
+0x3fef12c25bd71e09,
+0x3fef16286141b33d,
+0x3fef199bdd85529c,
+0x3fef1d1cd9fa652c,
+0x3fef20ab5fffd07a,
+0x3fef244778fafb22,
+0x3fef27f12e57d14b,
+0x3fef2ba88988c933,
+0x3fef2f6d9406e7b5,
+0x3fef33405751c4db,
+0x3fef3720dcef9069,
+0x3fef3b0f2e6d1675,
+0x3fef3f0b555dc3fa,
+0x3fef43155b5bab74,
+0x3fef472d4a07897c,
+0x3fef4b532b08c968,
+0x3fef4f87080d89f2,
+0x3fef53c8eacaa1d6,
+0x3fef5818dcfba487,
+0x3fef5c76e862e6d3,
+0x3fef60e316c98398,
+0x3fef655d71ff6075,
+0x3fef69e603db3285,
+0x3fef6e7cd63a8315,
+0x3fef7321f301b460,
+0x3fef77d5641c0658,
+0x3fef7c97337b9b5f,
+0x3fef81676b197d17,
+0x3fef864614f5a129,
+0x3fef8b333b16ee12,
+0x3fef902ee78b3ff6,
+0x3fef953924676d76,
+0x3fef9a51fbc74c83,
+0x3fef9f7977cdb740,
+0x3fefa4afa2a490da,
+0x3fefa9f4867cca6e,
+0x3fefaf482d8e67f1,
+0x3fefb4aaa2188510,
+0x3fefba1bee615a27,
+0x3fefbf9c1cb6412a,
+0x3fefc52b376bba97,
+0x3fefcac948dd7274,
+0x3fefd0765b6e4540,
+0x3fefd632798844f8,
+0x3fefdbfdad9cbe14,
+0x3fefe1d802243c89,
+0x3fefe7c1819e90d8,
+0x3fefedba3692d514,
+0x3feff3c22b8f71f1,
+0x3feff9d96b2a23d9,
+},
+};
diff --git a/sysdeps/aarch64/fpu/v_pow_log_data.c b/sysdeps/aarch64/fpu/v_pow_log_data.c
new file mode 100644
index 0000000000..0242fff477
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_pow_log_data.c
@@ -0,0 +1,186 @@
+/* Data for the log part of pow.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "vecmath_config.h"
+
+#define N (1 << V_POW_LOG_TABLE_BITS)
+
+/* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + log(z/c)
+	log(z/c) = poly(z/c - 1)
+
+   where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
+   and z falls into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = round(0x1p43*log(c))/0x1p43
+	tab[i].logctail = (double)(log(c) - logc)
+
+   where c is chosen near the center of the subinterval such that 1/c has only
+   a few precision bits so z/c - 1 is exactly representible as double:
+
+	1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
+
+   Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| <
+   0x1p-97, the last few bits of logc are rounded away so k*ln2hi + logc has no
+   rounding error and the interval for z is selected such that near x == 1,
+   where log(x)
+   is tiny, large cancellation error is avoided in logc + poly(z/c - 1).  */
+const struct v_pow_log_data __v_pow_log_data = {
+  /* relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
+     Coefficients are scaled to match the scaling during evaluation.  */
+  .poly = { -0x1p-1, -0x1.555555555556p-1, 0x1.0000000000006p-1,
+	    0x1.999999959554ep-1, -0x1.555555529a47ap-1, -0x1.2495b9b4845e9p0,
+	    0x1.0002b8b263fc3p0, },
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .invc = { 0x1.6a00000000000p+0, 0x1.6800000000000p+0, 0x1.6600000000000p+0,
+	    0x1.6400000000000p+0, 0x1.6200000000000p+0, 0x1.6000000000000p+0,
+	    0x1.5e00000000000p+0, 0x1.5c00000000000p+0, 0x1.5a00000000000p+0,
+	    0x1.5800000000000p+0, 0x1.5600000000000p+0, 0x1.5600000000000p+0,
+	    0x1.5400000000000p+0, 0x1.5200000000000p+0, 0x1.5000000000000p+0,
+	    0x1.4e00000000000p+0, 0x1.4c00000000000p+0, 0x1.4a00000000000p+0,
+	    0x1.4a00000000000p+0, 0x1.4800000000000p+0, 0x1.4600000000000p+0,
+	    0x1.4400000000000p+0, 0x1.4200000000000p+0, 0x1.4000000000000p+0,
+	    0x1.4000000000000p+0, 0x1.3e00000000000p+0, 0x1.3c00000000000p+0,
+	    0x1.3a00000000000p+0, 0x1.3a00000000000p+0, 0x1.3800000000000p+0,
+	    0x1.3600000000000p+0, 0x1.3400000000000p+0, 0x1.3400000000000p+0,
+	    0x1.3200000000000p+0, 0x1.3000000000000p+0, 0x1.3000000000000p+0,
+	    0x1.2e00000000000p+0, 0x1.2c00000000000p+0, 0x1.2c00000000000p+0,
+	    0x1.2a00000000000p+0, 0x1.2800000000000p+0, 0x1.2600000000000p+0,
+	    0x1.2600000000000p+0, 0x1.2400000000000p+0, 0x1.2400000000000p+0,
+	    0x1.2200000000000p+0, 0x1.2000000000000p+0, 0x1.2000000000000p+0,
+	    0x1.1e00000000000p+0, 0x1.1c00000000000p+0, 0x1.1c00000000000p+0,
+	    0x1.1a00000000000p+0, 0x1.1a00000000000p+0, 0x1.1800000000000p+0,
+	    0x1.1600000000000p+0, 0x1.1600000000000p+0, 0x1.1400000000000p+0,
+	    0x1.1400000000000p+0, 0x1.1200000000000p+0, 0x1.1000000000000p+0,
+	    0x1.1000000000000p+0, 0x1.0e00000000000p+0, 0x1.0e00000000000p+0,
+	    0x1.0c00000000000p+0, 0x1.0c00000000000p+0, 0x1.0a00000000000p+0,
+	    0x1.0a00000000000p+0, 0x1.0800000000000p+0, 0x1.0800000000000p+0,
+	    0x1.0600000000000p+0, 0x1.0400000000000p+0, 0x1.0400000000000p+0,
+	    0x1.0200000000000p+0, 0x1.0200000000000p+0, 0x1.0000000000000p+0,
+	    0x1.0000000000000p+0, 0x1.fc00000000000p-1, 0x1.f800000000000p-1,
+	    0x1.f400000000000p-1, 0x1.f000000000000p-1, 0x1.ec00000000000p-1,
+	    0x1.e800000000000p-1, 0x1.e400000000000p-1, 0x1.e200000000000p-1,
+	    0x1.de00000000000p-1, 0x1.da00000000000p-1, 0x1.d600000000000p-1,
+	    0x1.d400000000000p-1, 0x1.d000000000000p-1, 0x1.cc00000000000p-1,
+	    0x1.ca00000000000p-1, 0x1.c600000000000p-1, 0x1.c400000000000p-1,
+	    0x1.c000000000000p-1, 0x1.be00000000000p-1, 0x1.ba00000000000p-1,
+	    0x1.b800000000000p-1, 0x1.b400000000000p-1, 0x1.b200000000000p-1,
+	    0x1.ae00000000000p-1, 0x1.ac00000000000p-1, 0x1.aa00000000000p-1,
+	    0x1.a600000000000p-1, 0x1.a400000000000p-1, 0x1.a000000000000p-1,
+	    0x1.9e00000000000p-1, 0x1.9c00000000000p-1, 0x1.9a00000000000p-1,
+	    0x1.9600000000000p-1, 0x1.9400000000000p-1, 0x1.9200000000000p-1,
+	    0x1.9000000000000p-1, 0x1.8c00000000000p-1, 0x1.8a00000000000p-1,
+	    0x1.8800000000000p-1, 0x1.8600000000000p-1, 0x1.8400000000000p-1,
+	    0x1.8200000000000p-1, 0x1.7e00000000000p-1, 0x1.7c00000000000p-1,
+	    0x1.7a00000000000p-1, 0x1.7800000000000p-1, 0x1.7600000000000p-1,
+	    0x1.7400000000000p-1, 0x1.7200000000000p-1, 0x1.7000000000000p-1,
+	    0x1.6e00000000000p-1, 0x1.6c00000000000p-1, },
+  .logc
+  = { -0x1.62c82f2b9c800p-2, -0x1.5d1bdbf580800p-2, -0x1.5767717455800p-2,
+      -0x1.51aad872df800p-2, -0x1.4be5f95777800p-2, -0x1.4618bc21c6000p-2,
+      -0x1.404308686a800p-2, -0x1.3a64c55694800p-2, -0x1.347dd9a988000p-2,
+      -0x1.2e8e2bae12000p-2, -0x1.2895a13de8800p-2, -0x1.2895a13de8800p-2,
+      -0x1.22941fbcf7800p-2, -0x1.1c898c1699800p-2, -0x1.1675cababa800p-2,
+      -0x1.1058bf9ae4800p-2, -0x1.0a324e2739000p-2, -0x1.0402594b4d000p-2,
+      -0x1.0402594b4d000p-2, -0x1.fb9186d5e4000p-3, -0x1.ef0adcbdc6000p-3,
+      -0x1.e27076e2af000p-3, -0x1.d5c216b4fc000p-3, -0x1.c8ff7c79aa000p-3,
+      -0x1.c8ff7c79aa000p-3, -0x1.bc286742d9000p-3, -0x1.af3c94e80c000p-3,
+      -0x1.a23bc1fe2b000p-3, -0x1.a23bc1fe2b000p-3, -0x1.9525a9cf45000p-3,
+      -0x1.87fa06520d000p-3, -0x1.7ab890210e000p-3, -0x1.7ab890210e000p-3,
+      -0x1.6d60fe719d000p-3, -0x1.5ff3070a79000p-3, -0x1.5ff3070a79000p-3,
+      -0x1.526e5e3a1b000p-3, -0x1.44d2b6ccb8000p-3, -0x1.44d2b6ccb8000p-3,
+      -0x1.371fc201e9000p-3, -0x1.29552f81ff000p-3, -0x1.1b72ad52f6000p-3,
+      -0x1.1b72ad52f6000p-3, -0x1.0d77e7cd09000p-3, -0x1.0d77e7cd09000p-3,
+      -0x1.fec9131dbe000p-4, -0x1.e27076e2b0000p-4, -0x1.e27076e2b0000p-4,
+      -0x1.c5e548f5bc000p-4, -0x1.a926d3a4ae000p-4, -0x1.a926d3a4ae000p-4,
+      -0x1.8c345d631a000p-4, -0x1.8c345d631a000p-4, -0x1.6f0d28ae56000p-4,
+      -0x1.51b073f062000p-4, -0x1.51b073f062000p-4, -0x1.341d7961be000p-4,
+      -0x1.341d7961be000p-4, -0x1.16536eea38000p-4, -0x1.f0a30c0118000p-5,
+      -0x1.f0a30c0118000p-5, -0x1.b42dd71198000p-5, -0x1.b42dd71198000p-5,
+      -0x1.77458f632c000p-5, -0x1.77458f632c000p-5, -0x1.39e87b9fec000p-5,
+      -0x1.39e87b9fec000p-5, -0x1.f829b0e780000p-6, -0x1.f829b0e780000p-6,
+      -0x1.7b91b07d58000p-6, -0x1.fc0a8b0fc0000p-7, -0x1.fc0a8b0fc0000p-7,
+      -0x1.fe02a6b100000p-8, -0x1.fe02a6b100000p-8, 0x0.0000000000000p+0,
+      0x0.0000000000000p+0,  0x1.0101575890000p-7,  0x1.0205658938000p-6,
+      0x1.8492528c90000p-6,  0x1.0415d89e74000p-5,  0x1.466aed42e0000p-5,
+      0x1.894aa149fc000p-5,  0x1.ccb73cdddc000p-5,  0x1.eea31c006c000p-5,
+      0x1.1973bd1466000p-4,  0x1.3bdf5a7d1e000p-4,  0x1.5e95a4d97a000p-4,
+      0x1.700d30aeac000p-4,  0x1.9335e5d594000p-4,  0x1.b6ac88dad6000p-4,
+      0x1.c885801bc4000p-4,  0x1.ec739830a2000p-4,  0x1.fe89139dbe000p-4,
+      0x1.1178e8227e000p-3,  0x1.1aa2b7e23f000p-3,  0x1.2d1610c868000p-3,
+      0x1.365fcb0159000p-3,  0x1.4913d8333b000p-3,  0x1.527e5e4a1b000p-3,
+      0x1.6574ebe8c1000p-3,  0x1.6f0128b757000p-3,  0x1.7898d85445000p-3,
+      0x1.8beafeb390000p-3,  0x1.95a5adcf70000p-3,  0x1.a93ed3c8ae000p-3,
+      0x1.b31d8575bd000p-3,  0x1.bd087383be000p-3,  0x1.c6ffbc6f01000p-3,
+      0x1.db13db0d49000p-3,  0x1.e530effe71000p-3,  0x1.ef5ade4dd0000p-3,
+      0x1.f991c6cb3b000p-3,  0x1.07138604d5800p-2,  0x1.0c42d67616000p-2,
+      0x1.1178e8227e800p-2,  0x1.16b5ccbacf800p-2,  0x1.1bf99635a6800p-2,
+      0x1.214456d0eb800p-2,  0x1.2bef07cdc9000p-2,  0x1.314f1e1d36000p-2,
+      0x1.36b6776be1000p-2,  0x1.3c25277333000p-2,  0x1.419b423d5e800p-2,
+      0x1.4718dc271c800p-2,  0x1.4c9e09e173000p-2,  0x1.522ae0738a000p-2,
+      0x1.57bf753c8d000p-2,  0x1.5d5bddf596000p-2, },
+  .logctail
+  = { 0x1.ab42428375680p-48,  -0x1.ca508d8e0f720p-46, -0x1.362a4d5b6506dp-45,
+      -0x1.684e49eb067d5p-49, -0x1.41b6993293ee0p-47, 0x1.3d82f484c84ccp-46,
+      0x1.c42f3ed820b3ap-50,  0x1.0b1c686519460p-45,  0x1.5594dd4c58092p-45,
+      0x1.67b1e99b72bd8p-45,  0x1.5ca14b6cfb03fp-46,  0x1.5ca14b6cfb03fp-46,
+      -0x1.65a242853da76p-46, -0x1.fafbc68e75404p-46, 0x1.f1fc63382a8f0p-46,
+      -0x1.6a8c4fd055a66p-45, -0x1.c6bee7ef4030ep-47, -0x1.036b89ef42d7fp-48,
+      -0x1.036b89ef42d7fp-48, 0x1.d572aab993c87p-47,  0x1.b26b79c86af24p-45,
+      -0x1.72f4f543fff10p-46, 0x1.1ba91bbca681bp-45,  0x1.7794f689f8434p-45,
+      0x1.7794f689f8434p-45,  0x1.94eb0318bb78fp-46,  0x1.a4e633fcd9066p-52,
+      -0x1.58c64dc46c1eap-45, -0x1.58c64dc46c1eap-45, -0x1.ad1d904c1d4e3p-45,
+      0x1.bbdbf7fdbfa09p-45,  0x1.bdb9072534a58p-45,  0x1.bdb9072534a58p-45,
+      -0x1.0e46aa3b2e266p-46, -0x1.e9e439f105039p-46, -0x1.e9e439f105039p-46,
+      -0x1.0de8b90075b8fp-45, 0x1.70cc16135783cp-46,  0x1.70cc16135783cp-46,
+      0x1.178864d27543ap-48,  -0x1.48d301771c408p-45, -0x1.e80a41811a396p-45,
+      -0x1.e80a41811a396p-45, 0x1.a699688e85bf4p-47,  0x1.a699688e85bf4p-47,
+      -0x1.575545ca333f2p-45, 0x1.a342c2af0003cp-45,  0x1.a342c2af0003cp-45,
+      -0x1.d0c57585fbe06p-46, 0x1.53935e85baac8p-45,  0x1.53935e85baac8p-45,
+      0x1.37c294d2f5668p-46,  0x1.37c294d2f5668p-46,  -0x1.69737c93373dap-45,
+      0x1.f025b61c65e57p-46,  0x1.f025b61c65e57p-46,  0x1.c5edaccf913dfp-45,
+      0x1.c5edaccf913dfp-45,  0x1.47c5e768fa309p-46,  0x1.d599e83368e91p-45,
+      0x1.d599e83368e91p-45,  0x1.c827ae5d6704cp-46,  0x1.c827ae5d6704cp-46,
+      -0x1.cfc4634f2a1eep-45, -0x1.cfc4634f2a1eep-45, 0x1.502b7f526feaap-48,
+      0x1.502b7f526feaap-48,  -0x1.980267c7e09e4p-45, -0x1.980267c7e09e4p-45,
+      -0x1.88d5493faa639p-45, -0x1.f1e7cf6d3a69cp-50, -0x1.f1e7cf6d3a69cp-50,
+      -0x1.9e23f0dda40e4p-46, -0x1.9e23f0dda40e4p-46, 0x0.0000000000000p+0,
+      0x0.0000000000000p+0,   -0x1.0c76b999d2be8p-46, -0x1.3dc5b06e2f7d2p-45,
+      -0x1.aa0ba325a0c34p-45, 0x1.111c05cf1d753p-47,  -0x1.c167375bdfd28p-45,
+      -0x1.97995d05a267dp-46, -0x1.a68f247d82807p-46, -0x1.e113e4fc93b7bp-47,
+      -0x1.5325d560d9e9bp-45, 0x1.cc85ea5db4ed7p-45,  -0x1.c69063c5d1d1ep-45,
+      0x1.c1e8da99ded32p-49,  0x1.3115c3abd47dap-45,  -0x1.390802bf768e5p-46,
+      0x1.646d1c65aacd3p-45,  -0x1.dc068afe645e0p-45, -0x1.534d64fa10afdp-45,
+      0x1.1ef78ce2d07f2p-45,  0x1.ca78e44389934p-45,  0x1.39d6ccb81b4a1p-47,
+      0x1.62fa8234b7289p-51,  0x1.5837954fdb678p-45,  0x1.633e8e5697dc7p-45,
+      0x1.9cf8b2c3c2e78p-46,  -0x1.5118de59c21e1p-45, -0x1.c661070914305p-46,
+      -0x1.73d54aae92cd1p-47, 0x1.7f22858a0ff6fp-47,  -0x1.8724350562169p-45,
+      -0x1.c358d4eace1aap-47, -0x1.d4bc4595412b6p-45, -0x1.1ec72c5962bd2p-48,
+      -0x1.aff2af715b035p-45, 0x1.212276041f430p-51,  -0x1.a211565bb8e11p-51,
+      0x1.bcbecca0cdf30p-46,  0x1.89cdb16ed4e91p-48,  0x1.7188b163ceae9p-45,
+      -0x1.c210e63a5f01cp-45, 0x1.b9acdf7a51681p-45,  0x1.ca6ed5147bdb7p-45,
+      0x1.a87deba46baeap-47,  0x1.a9cfa4a5004f4p-45,  -0x1.8e27ad3213cb8p-45,
+      0x1.16ecdb0f177c8p-46,  0x1.83b54b606bd5cp-46,  0x1.8e436ec90e09dp-47,
+      -0x1.f27ce0967d675p-45, -0x1.e20891b0ad8a4p-45, 0x1.ebe708164c759p-45,
+      0x1.fadedee5d40efp-46,  -0x1.a0b2a08a465dcp-47, },
+};
diff --git a/sysdeps/aarch64/fpu/v_powf_data.c b/sysdeps/aarch64/fpu/v_powf_data.c
new file mode 100644
index 0000000000..f789b84850
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_powf_data.c
@@ -0,0 +1,102 @@
+/* Coefficients for single-precision SVE pow(x) function.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#include "vecmath_config.h"
+
+const struct v_powf_data __v_powf_data = {
+  .invc = { 0x1.6489890582816p+0,
+	    0x1.5cf19b35e3472p+0,
+	    0x1.55aac0e956d65p+0,
+	    0x1.4eb0022977e01p+0,
+	    0x1.47fcccda1dd1fp+0,
+	    0x1.418ceabab68c1p+0,
+	    0x1.3b5c788f1edb3p+0,
+	    0x1.3567de48e9c9ap+0,
+	    0x1.2fabc80fd19bap+0,
+	    0x1.2a25200ce536bp+0,
+	    0x1.24d108e0152e3p+0,
+	    0x1.1facd8ab2fbe1p+0,
+	    0x1.1ab614a03efdfp+0,
+	    0x1.15ea6d03af9ffp+0,
+	    0x1.1147b994bb776p+0,
+	    0x1.0ccbf650593aap+0,
+	    0x1.0875408477302p+0,
+	    0x1.0441d42a93328p+0,
+	    0x1p+0,
+	    0x1.f1d006c855e86p-1,
+	    0x1.e28c3341aa301p-1,
+	    0x1.d4bdf9aa64747p-1,
+	    0x1.c7b45a24e5803p-1,
+	    0x1.bb5f5eb2ed60ap-1,
+	    0x1.afb0bff8fe6b4p-1,
+	    0x1.a49badf7ab1f5p-1,
+	    0x1.9a14a111fc4c9p-1,
+	    0x1.901131f5b2fdcp-1,
+	    0x1.8687f73f6d865p-1,
+	    0x1.7d7067eb77986p-1,
+	    0x1.74c2c1cf97b65p-1,
+	    0x1.6c77f37cff2a1p-1
+  },
+  .logc = { -0x1.e960f97b22702p+3,
+	    -0x1.c993406cd4db6p+3,
+	    -0x1.aa711d9a7d0f3p+3,
+	    -0x1.8bf37bacdce9bp+3,
+	    -0x1.6e13b3519946ep+3,
+	    -0x1.50cb8281e4089p+3,
+	    -0x1.341504a237e2bp+3,
+	    -0x1.17eaab624ffbbp+3,
+	    -0x1.f88e708f8c853p+2,
+	    -0x1.c24b6da113914p+2,
+	    -0x1.8d02ee397cb1dp+2,
+	    -0x1.58ac1223408b3p+2,
+	    -0x1.253e6fd190e89p+2,
+	    -0x1.e5641882c12ffp+1,
+	    -0x1.81fea712926f7p+1,
+	    -0x1.203e240de64a3p+1,
+	    -0x1.8029b86a78281p0,
+	    -0x1.85d713190fb9p-1,
+	    0x0p+0,
+	    0x1.4c1cc07312997p0,
+	    0x1.5e1848ccec948p+1,
+	    0x1.04cfcb7f1196fp+2,
+	    0x1.582813d463c21p+2,
+	    0x1.a936fa68760ccp+2,
+	    0x1.f81bc31d6cc4ep+2,
+	    0x1.2279a09fae6b1p+3,
+	    0x1.47ec0b6df5526p+3,
+	    0x1.6c71762280f1p+3,
+	    0x1.90155070798dap+3,
+	    0x1.b2e23b1d3068cp+3,
+	    0x1.d4e21b0daa86ap+3,
+	    0x1.f61e2a2f67f3fp+3
+  },
+  .scale = { 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
+	     0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
+	     0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
+	     0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+	     0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
+	     0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
+	     0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
+	     0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+	     0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
+	     0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
+	     0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+  },
+};
diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h
index c8cfc03bc0..7f0a8aa5f2 100644
--- a/sysdeps/aarch64/fpu/vecmath_config.h
+++ b/sysdeps/aarch64/fpu/vecmath_config.h
@@ -35,17 +35,6 @@
     __ptr;                                                                    \
   })
 
-static inline uint64_t
-asuint64 (double f)
-{
-  union
-  {
-    double f;
-    uint64_t i;
-  } u = { f };
-  return u.i;
-}
-
 #define V_LOG_POLY_ORDER 6
 #define V_LOG_TABLE_BITS 7
 extern const struct v_log_data
@@ -130,4 +119,35 @@ extern const struct erfcf_data
   } tab[645];
 } __erfcf_data attribute_hidden;
 
+/* Some data for AdvSIMD and SVE pow's internal exp and log.  */
+#define V_POW_EXP_TABLE_BITS 8
+extern const struct v_pow_exp_data
+{
+  double poly[3];
+  double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
+  uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
+} __v_pow_exp_data attribute_hidden;
+
+#define V_POW_LOG_TABLE_BITS 7
+extern const struct v_pow_log_data
+{
+  double poly[7]; /* First coefficient is 1.  */
+  double ln2_hi, ln2_lo;
+  double invc[1 << V_POW_LOG_TABLE_BITS];
+  double logc[1 << V_POW_LOG_TABLE_BITS];
+  double logctail[1 << V_POW_LOG_TABLE_BITS];
+} __v_pow_log_data attribute_hidden;
+
+/* Some data for SVE powf's internal exp and log.  */
+#define V_POWF_EXP2_TABLE_BITS 5
+#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
+extern const struct v_powf_data
+{
+  double invc[V_POWF_LOG2_N];
+  double logc[V_POWF_LOG2_N];
+  uint64_t scale[V_POWF_EXP2_N];
+} __v_powf_data attribute_hidden;
+
 #endif
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 116a5404f5..656d4b0169 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -477,11 +477,19 @@ double: 4
 float: 1
 ldouble: 1
 
+Function: "cbrt_advsimd":
+double: 1
+float: 1
+
 Function: "cbrt_downward":
 double: 4
 float: 1
 ldouble: 1
 
+Function: "cbrt_sve":
+double: 1
+float: 1
+
 Function: "cbrt_towardzero":
 double: 3
 float: 1
@@ -1174,10 +1182,18 @@ double: 1
 float: 1
 ldouble: 1
 
+Function: "hypot_advsimd":
+double: 1
+float: 1
+
 Function: "hypot_downward":
 double: 1
 ldouble: 1
 
+Function: "hypot_sve":
+double: 1
+float: 1
+
 Function: "hypot_towardzero":
 double: 1
 ldouble: 1
@@ -1355,6 +1371,26 @@ double: 3
 float: 3
 ldouble: 1
 
+Function: "log2p1":
+double: 1
+float: 1
+ldouble: 3
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+ldouble: 3
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+ldouble: 2
+
+Function: "log2p1_upward":
+double: 1
+float: 2
+ldouble: 2
+
 Function: "log_advsimd":
 double: 1
 float: 3
@@ -1381,11 +1417,19 @@ double: 1
 float: 1
 ldouble: 2
 
+Function: "pow_advsimd":
+double: 1
+float: 2
+
 Function: "pow_downward":
 double: 1
 float: 1
 ldouble: 2
 
+Function: "pow_sve":
+double: 1
+float: 2
+
 Function: "pow_towardzero":
 double: 1
 float: 1
diff --git a/sysdeps/aarch64/multiarch/memchr_generic.S b/sysdeps/aarch64/multiarch/memchr_generic.S
index 0ed5811745..8d554275ba 100644
--- a/sysdeps/aarch64/multiarch/memchr_generic.S
+++ b/sysdeps/aarch64/multiarch/memchr_generic.S
@@ -24,6 +24,9 @@
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(name)
 
+# undef weak_alias
+# define weak_alias(a, b)
+
 /* Add a hidden definition for use within libc.so.  */
 # ifdef SHARED
 	.globl __GI_memchr; __GI_memchr = __memchr_generic
diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
index 81748bdbce..e125a5ed85 100644
--- a/sysdeps/aarch64/multiarch/memset_generic.S
+++ b/sysdeps/aarch64/multiarch/memset_generic.S
@@ -33,3 +33,7 @@
 #endif
 
 #include <../memset.S>
+
+#if IS_IN (rtld)
+strong_alias (memset, __memset_generic)
+#endif
diff --git a/sysdeps/aarch64/multiarch/strlen_generic.S b/sysdeps/aarch64/multiarch/strlen_generic.S
index f980a9a68f..ceeafe920c 100644
--- a/sysdeps/aarch64/multiarch/strlen_generic.S
+++ b/sysdeps/aarch64/multiarch/strlen_generic.S
@@ -30,6 +30,9 @@
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(name)
 
+# undef weak_alias
+# define weak_alias(a, b)
+
 # ifdef SHARED
 /* It doesn't make sense to send libc-internal strlen calls through a PLT. */
 	.globl __GI_strlen; __GI_strlen = __strlen_generic
diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps
index 5720e73288..fc7ba1439a 100644
--- a/sysdeps/arm/libm-test-ulps
+++ b/sysdeps/arm/libm-test-ulps
@@ -965,6 +965,22 @@ Function: "log2_upward":
 double: 3
 float: 3
 
+Function: "log2p1":
+double: 1
+float: 1
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+
+Function: "log2p1_upward":
+double: 1
+float: 2
+
 Function: "log_downward":
 float: 2
 
diff --git a/sysdeps/i386/fpu/w_exp10_compat.c b/sysdeps/i386/fpu/w_exp10_compat.c
index b53455386e..49a0e03385 100644
--- a/sysdeps/i386/fpu/w_exp10_compat.c
+++ b/sysdeps/i386/fpu/w_exp10_compat.c
@@ -1,3 +1,8 @@
 /* i386 provides an optimized __ieee754_exp10.  */
-#define NO_COMPAT_NEEDED 1
-#include <math/w_exp10_compat.c>
+#ifdef SHARED
+# define NO_COMPAT_NEEDED 1
+# include <math/w_exp10_compat.c>
+#else
+# include <math-type-macros-double.h>
+# include <w_exp10_template.c>
+#endif
diff --git a/sysdeps/i386/fpu/w_fmod_compat.c b/sysdeps/i386/fpu/w_fmod_compat.c
index 5ac9995ffd..528bfc2a13 100644
--- a/sysdeps/i386/fpu/w_fmod_compat.c
+++ b/sysdeps/i386/fpu/w_fmod_compat.c
@@ -7,8 +7,9 @@
 # define LIBM_SVID_COMPAT 1
 # undef compat_symbol
 # define compat_symbol(a, b, c, d)
-#endif
-#include <math/w_fmod_compat.c>
-#ifdef SHARED
+# include <math/w_fmod_compat.c>
 libm_alias_double (__fmod_compat, fmod)
+#else
+#include <math-type-macros-double.h>
+#include <w_fmod_template.c>
 #endif
diff --git a/sysdeps/i386/fpu/w_fmodf_compat.c b/sysdeps/i386/fpu/w_fmodf_compat.c
index cc417e07d3..5a61693e51 100644
--- a/sysdeps/i386/fpu/w_fmodf_compat.c
+++ b/sysdeps/i386/fpu/w_fmodf_compat.c
@@ -7,8 +7,9 @@
 # define LIBM_SVID_COMPAT 1
 # undef compat_symbol
 # define compat_symbol(a, b, c, d)
-#endif
-#include <math/w_fmodf_compat.c>
-#ifdef SHARED
+# include <math/w_fmodf_compat.c>
 libm_alias_float (__fmod_compat, fmod)
+#else
+#include <math-type-macros-float.h>
+#include <w_fmod_template.c>
 #endif
diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
index 3e26f112d6..79856d498a 100644
--- a/sysdeps/i386/i586/memcpy.S
+++ b/sysdeps/i386/i586/memcpy.S
@@ -26,7 +26,7 @@
 #define LEN	SRC+4
 
         .text
-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
 ENTRY (__memcpy_chk)
 	movl	12(%esp), %eax
 	cmpl	%eax, 16(%esp)
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index da6602f909..9eb3088d7a 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -1424,6 +1424,30 @@ float: 1
 float128: 1
 ldouble: 1
 
+Function: "log2p1":
+double: 1
+float: 1
+float128: 3
+ldouble: 2
+
+Function: "log2p1_downward":
+double: 1
+float: 1
+float128: 3
+ldouble: 4
+
+Function: "log2p1_towardzero":
+double: 1
+float: 1
+float128: 2
+ldouble: 4
+
+Function: "log2p1_upward":
+double: 1
+float: 1
+float128: 2
+ldouble: 5
+
 Function: "log_downward":
 double: 1
 float128: 1
diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
index f230359ad6..effd958120 100644
--- a/sysdeps/i386/i686/memmove.S
+++ b/sysdeps/i386/i686/memmove.S
@@ -29,7 +29,7 @@
 #define SRC	DEST+4
 #define LEN	SRC+4
 
-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
 ENTRY_CHK (__memmove_chk)
 	movl	12(%esp), %eax
 	cmpl	%eax, 16(%esp)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index f02f5a6df7..ab06771ea0 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -27,7 +27,7 @@
 #define LEN	CHR+4
 
         .text
-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
 ENTRY_CHK (__memset_chk)
 	movl	12(%esp), %eax
 	cmpl	%eax, 16(%esp)
diff --git a/sysdeps/i386/stpncpy.S b/sysdeps/i386/stpncpy.S
index 895fee8a1e..74130bc71d 100644
--- a/sysdeps/i386/stpncpy.S
+++ b/sysdeps/i386/stpncpy.S
@@ -139,4 +139,6 @@ L(9):	popl %esi		/* restore saved register content */
 END (__stpncpy)
 
 libc_hidden_def (__stpncpy)
+#ifndef __stpncpy
 weak_alias (__stpncpy, stpncpy)
+#endif
diff --git a/sysdeps/ieee754/dbl-64/e_exp10.c b/sysdeps/ieee754/dbl-64/e_exp10.c
index 225fc74c4c..7ea8270063 100644
--- a/sysdeps/ieee754/dbl-64/e_exp10.c
+++ b/sysdeps/ieee754/dbl-64/e_exp10.c
@@ -38,7 +38,7 @@ special_case (uint64_t sbits, double_t tmp, uint64_t ki)
 {
   double_t scale, y;
 
-  if (ki - (1ull << 16) < 0x80000000)
+  if ((ki & 0x80000000) == 0)
     {
       /* The exponent of scale might have overflowed by 1.  */
       sbits -= 1ull << 52;
@@ -100,14 +100,14 @@ __exp10 (double x)
   /* Reduce x: z = x * N / log10(2), k = round(z).  */
   double_t z = __exp_data.invlog10_2N * x;
   double_t kd;
-  int64_t ki;
+  uint64_t ki;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #else
   kd = math_narrow_eval (z + Shift);
+  ki = asuint64 (kd);
   kd -= Shift;
-  ki = kd;
 #endif
 
   /* r = x - k * log10(2), r in [-0.5, 0.5].  */
diff --git a/sysdeps/ieee754/dbl-64/s_nearbyint.c b/sysdeps/ieee754/dbl-64/s_nearbyint.c
index 08fe517b83..606002d680 100644
--- a/sysdeps/ieee754/dbl-64/s_nearbyint.c
+++ b/sysdeps/ieee754/dbl-64/s_nearbyint.c
@@ -72,4 +72,6 @@ __nearbyint (double x)
   return t;
 #endif /* ! USE_NEARBYINT_BUILTIN  */
 }
+#ifndef __nearbyint
 libm_alias_double (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/ieee754/float128/s_isnanf128.c b/sysdeps/ieee754/float128/s_isnanf128.c
index 59f71533ce..291d4427f5 100644
--- a/sysdeps/ieee754/float128/s_isnanf128.c
+++ b/sysdeps/ieee754/float128/s_isnanf128.c
@@ -11,8 +11,14 @@
 #include "../ldbl-128/s_isnanl.c"
 #if !IS_IN (libm)
 #include <float128-abi.h>
+#ifdef SHARED
 hidden_ver (__isnanf128_impl, __isnanf128)
+#else
+strong_alias (__isnanf128_impl, __isnanf128)
+#endif
+#ifndef isnanl
 _weak_alias (__isnanf128_impl, isnanl)
+#endif
 versioned_symbol (libc, __isnanf128_impl, __isnanf128, GLIBC_2_34);
 #if (SHLIB_COMPAT (libc, FLOAT128_VERSION_M, GLIBC_2_34))
 strong_alias (__isnanf128_impl, __isnanf128_alias)
diff --git a/sysdeps/ieee754/flt-32/s_nearbyintf.c b/sysdeps/ieee754/flt-32/s_nearbyintf.c
index b270749f4c..5dd0e8f8af 100644
--- a/sysdeps/ieee754/flt-32/s_nearbyintf.c
+++ b/sysdeps/ieee754/flt-32/s_nearbyintf.c
@@ -68,4 +68,6 @@ __nearbyintf (float x)
   return t;
 #endif /* ! USE_NEARBYINT_BUILTIN  */
 }
+#ifndef __nearbyintf
 libm_alias_float (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/ieee754/ldbl-128/s_finitel.c b/sysdeps/ieee754/ldbl-128/s_finitel.c
index bfdd88fbbb..f1fff0ced2 100644
--- a/sysdeps/ieee754/ldbl-128/s_finitel.c
+++ b/sysdeps/ieee754/ldbl-128/s_finitel.c
@@ -32,4 +32,6 @@ int __finitel(_Float128 x)
 				-0x7fff000000000000LL)>>63);
 }
 mathx_hidden_def (__finitel)
+#ifndef finitel
 weak_alias (__finitel, finitel)
+#endif
diff --git a/sysdeps/ieee754/ldbl-128/s_isinfl.c b/sysdeps/ieee754/ldbl-128/s_isinfl.c
index baa9a98e80..a0fa0ba659 100644
--- a/sysdeps/ieee754/ldbl-128/s_isinfl.c
+++ b/sysdeps/ieee754/ldbl-128/s_isinfl.c
@@ -24,4 +24,6 @@ __isinfl (_Float128 x)
 	return ~(lx >> 63) & (hx >> 62);
 }
 mathx_hidden_def (__isinfl)
+#ifndef isinfl
 weak_alias (__isinfl, isinfl)
+#endif
diff --git a/sysdeps/ieee754/ldbl-128/s_nextafterl.c b/sysdeps/ieee754/ldbl-128/s_nextafterl.c
index 90e71e3907..f3f3b138dd 100644
--- a/sysdeps/ieee754/ldbl-128/s_nextafterl.c
+++ b/sysdeps/ieee754/ldbl-128/s_nextafterl.c
@@ -83,5 +83,9 @@ _Float128 __nextafterl(_Float128 x, _Float128 y)
 	return x;
 }
 libm_alias_ldouble (__nextafter, nextafter)
+#ifndef __nexttowardl
 strong_alias (__nextafterl, __nexttowardl)
+#endif
+#ifndef nexttowardl
 weak_alias (__nextafterl, nexttowardl)
+#endif
diff --git a/sysdeps/ieee754/ldbl-128ibm-compat/Versions b/sysdeps/ieee754/ldbl-128ibm-compat/Versions
index c0679a1153..c9a74366fd 100644
--- a/sysdeps/ieee754/ldbl-128ibm-compat/Versions
+++ b/sysdeps/ieee754/ldbl-128ibm-compat/Versions
@@ -137,6 +137,9 @@ libm {
     __fminimum_magieee128;
     __fminimum_mag_numieee128;
   }
+  GLIBC_2.40 {
+    __log2p1ieee128;
+  }
 }
 libc {
   LDBL_IBM128_VERSION {
diff --git a/sysdeps/ieee754/ldbl-64-128/s_copysignl.c b/sysdeps/ieee754/ldbl-64-128/s_copysignl.c
index 11b42d04ba..80137847d3 100644
--- a/sysdeps/ieee754/ldbl-64-128/s_copysignl.c
+++ b/sysdeps/ieee754/ldbl-64-128/s_copysignl.c
@@ -1,10 +1,10 @@
 #include <math_ldbl_opt.h>
 #include <libm-alias-ldouble.h>
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 # undef libm_alias_ldouble
 # define libm_alias_ldouble(from, to)
 #endif
 #include <sysdeps/ieee754/ldbl-128/s_copysignl.c>
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 long_double_symbol (libc, __copysignl, copysignl);
 #endif
diff --git a/sysdeps/ieee754/ldbl-64-128/s_frexpl.c b/sysdeps/ieee754/ldbl-64-128/s_frexpl.c
index 73ac41e40c..f5f7d349f7 100644
--- a/sysdeps/ieee754/ldbl-64-128/s_frexpl.c
+++ b/sysdeps/ieee754/ldbl-64-128/s_frexpl.c
@@ -1,10 +1,10 @@
 #include <math_ldbl_opt.h>
 #include <libm-alias-ldouble.h>
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 # undef libm_alias_ldouble
 # define libm_alias_ldouble(from, to)
 #endif
 #include <sysdeps/ieee754/ldbl-128/s_frexpl.c>
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 long_double_symbol (libc, __frexpl, frexpl);
 #endif
diff --git a/sysdeps/ieee754/ldbl-64-128/s_modfl.c b/sysdeps/ieee754/ldbl-64-128/s_modfl.c
index 7d7aeae111..ba3d31334a 100644
--- a/sysdeps/ieee754/ldbl-64-128/s_modfl.c
+++ b/sysdeps/ieee754/ldbl-64-128/s_modfl.c
@@ -1,10 +1,10 @@
 #include <math_ldbl_opt.h>
 #include <libm-alias-ldouble.h>
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 # undef libm_alias_ldouble
 # define libm_alias_ldouble(from, to)
 #endif
 #include <sysdeps/ieee754/ldbl-128/s_modfl.c>
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 long_double_symbol (libc, __modfl, modfl);
 #endif
diff --git a/sysdeps/ieee754/ldbl-opt/Makefile b/sysdeps/ieee754/ldbl-opt/Makefile
index 74788ac999..d1eda5d022 100644
--- a/sysdeps/ieee754/ldbl-opt/Makefile
+++ b/sysdeps/ieee754/ldbl-opt/Makefile
@@ -11,47 +11,239 @@ libm-routines += s_nexttowardfd
 routines += nldbl-compat
 
 extra-libs += libnldbl
-libnldbl-calls = asprintf dprintf fprintf fscanf fwprintf fwscanf iovfscanf \
-		 obstack_printf obstack_vprintf printf scanf snprintf \
-		 sprintf sscanf swprintf swscanf vasprintf vdprintf vfprintf \
-		 vfscanf vfwprintf vfwscanf vprintf vscanf vsnprintf \
-		 vsprintf vsscanf vswprintf vswscanf vwprintf vwscanf \
-		 wprintf wscanf printf_fp printf_size \
-		 fprintf_chk fwprintf_chk printf_chk snprintf_chk sprintf_chk \
-		 swprintf_chk vfprintf_chk vfwprintf_chk vprintf_chk \
-		 vsnprintf_chk vsprintf_chk vswprintf_chk vwprintf_chk \
-		 wprintf_chk asprintf_chk vasprintf_chk dprintf_chk \
-		 vdprintf_chk obstack_printf_chk obstack_vprintf_chk \
-		 syslog syslog_chk vsyslog vsyslog_chk \
-		 strfmon strfmon_l \
-		 strfroml \
-		 strtold strtold_l strtoldint wcstold wcstold_l wcstoldint \
-		 qecvt qfcvt qgcvt qecvt_r qfcvt_r \
-		 isinf isnan finite signbit scalb log2 lgamma_r ceil \
-		 significand acos asin atan atan2 cos sin tan cosh sinh \
-		 tanh acosh asinh atanh exp log log10 exp10 expm1 \
-		 log1p logb exp2 sqrt cbrt fabs floor j0 j1 y0 y1 erf erfc \
-		 lgamma tgamma gamma rint nearbyint round trunc \
-		 copysign fdim fmax fmin nextafter pow hypot fmod \
-		 remainder ldexp scalbn frexp modf scalbln fma nan sincos \
-		 jn yn ilogb remquo lrint lround llrint llround nexttowardf \
-		 nexttoward conj cacos cacosh casin catan catanh ccos ccosh \
-		 casinh cexp clog cproj csin csinh csqrt ctan ctanh cpow \
-		 cabs carg cimag creal clog10 \
-		 isoc99_scanf isoc99_fscanf isoc99_sscanf \
-		 isoc99_vscanf isoc99_vfscanf isoc99_vsscanf \
-		 isoc99_wscanf isoc99_fwscanf isoc99_swscanf \
-		 isoc99_vwscanf isoc99_vfwscanf isoc99_vswscanf \
-		 isoc23_scanf isoc23_fscanf isoc23_sscanf \
-		 isoc23_vscanf isoc23_vfscanf isoc23_vsscanf \
-		 isoc23_wscanf isoc23_fwscanf isoc23_swscanf \
-		 isoc23_vwscanf isoc23_vfwscanf isoc23_vswscanf \
-		 nextup nextdown totalorder totalordermag getpayload \
-		 canonicalize setpayload setpayloadsig llogb fmaxmag fminmag \
-		 roundeven fromfp ufromfp fromfpx ufromfpx fadd dadd \
-		 fdiv ddiv ffma dfma fmul dmul fsqrt dsqrt fsub dsub \
-		 fmaximum fmaximum_mag fmaximum_num fmaximum_mag_num \
-		 fminimum fminimum_mag fminimum_num fminimum_mag_num
+libnldbl-calls = \
+  acos \
+  acosh \
+  asin \
+  asinh \
+  asprintf \
+  asprintf_chk \
+  atan \
+  atan2 \
+  atanh \
+  cabs \
+  cacos \
+  cacosh \
+  canonicalize \
+  carg \
+  casin \
+  casinh \
+  catan \
+  catanh \
+  cbrt \
+  ccos \
+  ccosh \
+  ceil \
+  cexp \
+  cimag \
+  clog \
+  clog10 \
+  conj \
+  copysign \
+  cos \
+  cosh \
+  cpow \
+  cproj \
+  creal \
+  csin \
+  csinh \
+  csqrt \
+  ctan \
+  ctanh \
+  dadd \
+  ddiv \
+  dfma \
+  dmul \
+  dprintf \
+  dprintf_chk \
+  dsqrt \
+  dsub \
+  erf \
+  erfc \
+  exp \
+  exp2 \
+  exp10 \
+  expm1 \
+  fabs \
+  fadd \
+  fdim \
+  fdiv \
+  ffma \
+  finite \
+  floor \
+  fma \
+  fmax \
+  fmaximum \
+  fmaximum_mag \
+  fmaximum_mag_num \
+  fmaximum_num \
+  fmaxmag \
+  fmin \
+  fminimum \
+  fminimum_mag \
+  fminimum_mag_num \
+  fminimum_num \
+  fminmag \
+  fmod \
+  fmul \
+  fprintf \
+  fprintf_chk \
+  frexp \
+  fromfp \
+  fromfpx \
+  fscanf \
+  fsqrt \
+  fsub \
+  fwprintf \
+  fwprintf_chk \
+  fwscanf \
+  gamma \
+  getpayload \
+  hypot \
+  ilogb \
+  iovfscanf \
+  isinf \
+  isnan \
+  isoc23_fscanf \
+  isoc23_fwscanf \
+  isoc23_scanf \
+  isoc23_sscanf \
+  isoc23_swscanf \
+  isoc23_vfscanf \
+  isoc23_vfwscanf \
+  isoc23_vscanf \
+  isoc23_vsscanf \
+  isoc23_vswscanf \
+  isoc23_vwscanf \
+  isoc23_wscanf \
+  isoc99_fscanf \
+  isoc99_fwscanf \
+  isoc99_scanf \
+  isoc99_sscanf \
+  isoc99_swscanf \
+  isoc99_vfscanf \
+  isoc99_vfwscanf \
+  isoc99_vscanf \
+  isoc99_vsscanf \
+  isoc99_vswscanf \
+  isoc99_vwscanf \
+  isoc99_wscanf \
+  j0 \
+  j1 \
+  jn \
+  ldexp \
+  lgamma \
+  lgamma_r \
+  llogb \
+  llrint \
+  llround \
+  log \
+  log2 \
+  log10 \
+  log1p \
+  log2p1 \
+  logb \
+  lrint \
+  lround \
+  modf \
+  nan \
+  nearbyint \
+  nextafter \
+  nextdown \
+  nexttoward \
+  nexttowardf \
+  nextup \
+  obstack_printf \
+  obstack_printf_chk \
+  obstack_vprintf \
+  obstack_vprintf_chk \
+  pow \
+  printf \
+  printf_chk \
+  printf_fp \
+  printf_size \
+  qecvt \
+  qecvt_r \
+  qfcvt \
+  qfcvt_r \
+  qgcvt \
+  remainder \
+  remquo \
+  rint \
+  round \
+  roundeven \
+  scalb \
+  scalbln \
+  scalbn \
+  scanf \
+  setpayload \
+  setpayloadsig \
+  signbit \
+  significand \
+  sin \
+  sincos \
+  sinh \
+  snprintf \
+  snprintf_chk \
+  sprintf \
+  sprintf_chk \
+  sqrt \
+  sscanf \
+  strfmon \
+  strfmon_l \
+  strfroml \
+  strtold \
+  strtold_l \
+  strtoldint \
+  swprintf \
+  swprintf_chk \
+  swscanf \
+  syslog \
+  syslog_chk \
+  tan \
+  tanh \
+  tgamma \
+  totalorder \
+  totalordermag \
+  trunc \
+  ufromfp \
+  ufromfpx \
+  vasprintf \
+  vasprintf_chk \
+  vdprintf \
+  vdprintf_chk \
+  vfprintf \
+  vfprintf_chk \
+  vfscanf \
+  vfwprintf \
+  vfwprintf_chk \
+  vfwscanf \
+  vprintf \
+  vprintf_chk \
+  vscanf \
+  vsnprintf \
+  vsnprintf_chk \
+  vsprintf \
+  vsprintf_chk \
+  vsscanf \
+  vswprintf \
+  vswprintf_chk \
+  vswscanf \
+  vsyslog \
+  vsyslog_chk \
+  vwprintf \
+  vwprintf_chk \
+  vwscanf \
+  wcstold \
+  wcstold_l \
+  wcstoldint \
+  wprintf \
+  wprintf_chk \
+  wscanf \
+  y0 \
+  y1 \
+  yn \
+# libnldbl-calls
 libnldbl-routines = $(libnldbl-calls:%=nldbl-%)
 libnldbl-inhibit-o = $(object-suffixes)
 libnldbl-static-only-routines = $(libnldbl-routines)
@@ -151,6 +343,7 @@ CFLAGS-nldbl-log.c = -fno-builtin-logl
 CFLAGS-nldbl-log10.c = -fno-builtin-log10l
 CFLAGS-nldbl-log1p.c = -fno-builtin-log1pl
 CFLAGS-nldbl-log2.c = -fno-builtin-log2l
+CFLAGS-nldbl-log2p1.c = -fno-builtin-log2p1l
 CFLAGS-nldbl-logb.c = -fno-builtin-logbl
 CFLAGS-nldbl-lrint.c = -fno-builtin-lrintl
 CFLAGS-nldbl-lround.c = -fno-builtin-lroundl
@@ -190,8 +383,11 @@ CFLAGS-nldbl-y0.c = -fno-builtin-y0l
 CFLAGS-nldbl-y1.c = -fno-builtin-y1l
 CFLAGS-nldbl-yn.c = -fno-builtin-ynl
 
-tests += test-narrow-macros-ldbl-64 test-nldbl-redirect \
-	 test-redirection-ldbl-64
+tests += \
+  test-narrow-macros-ldbl-64 \
+  test-nldbl-redirect \
+  test-redirection-ldbl-64 \
+# tests
 CFLAGS-test-narrow-macros-ldbl-64.c += -mlong-double-64
 CFLAGS-test-nldbl-redirect.c += -mlong-double-64
 CFLAGS-test-redirection-ldbl-64.c += -mlong-double-64
@@ -231,10 +427,12 @@ CFLAGS-tst-nldbl-errorfptr.c += -mlong-double-64
 endif
 
 ifeq ($(subdir), stdio-common)
-tests += tst-nldbl-scanf-binary-c11 \
-	 tst-nldbl-scanf-binary-c23 \
-	 tst-nldbl-scanf-binary-gnu11 \
-	 tst-nldbl-scanf-binary-gnu89
+tests += \
+  tst-nldbl-scanf-binary-c11 \
+  tst-nldbl-scanf-binary-c23 \
+  tst-nldbl-scanf-binary-gnu11 \
+  tst-nldbl-scanf-binary-gnu89 \
+# tests
 
 # Some versions of GCC supported for building glibc do not support -std=c23
 # (added in GCC 14), or the older name -std=c2x (added in GCC 9), so
@@ -252,10 +450,12 @@ CFLAGS-tst-nldbl-scanf-binary-gnu89.c += -mlong-double-64 -std=gnu89 \
 endif
 
 ifeq ($(subdir), wcsmbs)
-tests += tst-nldbl-wscanf-binary-c11 \
-	 tst-nldbl-wscanf-binary-c23 \
-	 tst-nldbl-wscanf-binary-gnu11 \
-	 tst-nldbl-wscanf-binary-gnu89
+tests += \
+  tst-nldbl-wscanf-binary-c11 \
+  tst-nldbl-wscanf-binary-c23 \
+  tst-nldbl-wscanf-binary-gnu11 \
+  tst-nldbl-wscanf-binary-gnu89 \
+# tests
 
 # Some versions of GCC supported for building glibc do not support -std=c23
 # (added in GCC 14), or the older name -std=c2x (added in GCC 9), so
diff --git a/sysdeps/ieee754/ldbl-opt/nldbl-log2p1.c b/sysdeps/ieee754/ldbl-opt/nldbl-log2p1.c
new file mode 100644
index 0000000000..989c69e3d7
--- /dev/null
+++ b/sysdeps/ieee754/ldbl-opt/nldbl-log2p1.c
@@ -0,0 +1,8 @@
+#include "nldbl-compat.h"
+
+double
+attribute_hidden
+log2p1l (double x)
+{
+  return log2p1 (x);
+}
diff --git a/sysdeps/ieee754/ldbl-opt/s_ldexpl.c b/sysdeps/ieee754/ldbl-opt/s_ldexpl.c
index 1afbe7d8ad..932cc4341c 100644
--- a/sysdeps/ieee754/ldbl-opt/s_ldexpl.c
+++ b/sysdeps/ieee754/ldbl-opt/s_ldexpl.c
@@ -17,13 +17,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 # define declare_mgen_alias(f,t)
 #endif
 #include <math-type-macros-ldouble.h>
 #include <s_ldexp_template.c>
 
-#if IS_IN (libc)
+#if IS_IN (libc) && defined SHARED
 long_double_symbol (libc, __ldexpl, ldexpl);
 long_double_symbol (libc, __wrap_scalbnl, scalbnl);
 #endif
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
index 446bda6563..b00c090faa 100644
--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
@@ -1,5 +1,7 @@
 ifeq ($(subdir),misc)
-sysdep_headers += sys/asm.h
+sysdep_headers += \
+  sys/asm.h \
+  # sysdep_headers
 
 tests += \
   tst-hwcap-tunables \
@@ -9,21 +11,47 @@ tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
 endif
 
 ifeq ($(subdir),elf)
-gen-as-const-headers += dl-link.sym
+sysdep-dl-routines += \
+  dl-tlsdesc \
+  tlsdesc \
+  # sysdep-dl-routines
+
+gen-as-const-headers += \
+  dl-link.sym \
+  # gen-as-const-headers
+endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += \
+  tlsdesc.sym \
+  # gen-as-const-headers
 endif
 
 ifeq ($(subdir),elf)
 sysdep-dl-routines += \
   dl-get-cpu-features \
   # sysdep-dl-routines
+
+# Disable the compiler from using LSX for TLS descriptor tests, or storing into
+# 16B TLS variable may clobber FP/vector registers and prevent us from checking
+# their contents.
+ifeq (yes,$(loongarch-vec-com))
+CFLAGS-tst-gnu2-tls2mod0.c += -mno-lsx
+CFLAGS-tst-gnu2-tls2mod1.c += -mno-lsx
+CFLAGS-tst-gnu2-tls2mod2.c += -mno-lsx
+endif
 endif
 
 # LoongArch's assembler also needs to know about PIC as it changes the
 # definition of some assembler macros.
-ASFLAGS-.os += $(pic-ccflag)
+ASFLAGS-.os += \
+  $(pic-ccflag) \
+  # ASFLAGS-.os
 
 # All the objects in lib*_nonshared.a need to be compiled with medium code
 # model or large applications may fail to link.
 ifeq (yes,$(have-cmodel-medium))
-CFLAGS-.oS += -mcmodel=medium
+CFLAGS-.oS += \
+  -mcmodel=medium \
+  # CFLAGS-.oS
 endif
diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
index 30b60d1983..afcef6cd22 100644
--- a/sysdeps/loongarch/configure
+++ b/sysdeps/loongarch/configure
@@ -110,3 +110,39 @@ if test $libc_cv_loongarch_vec_asm = no; then
   as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
 fi
 
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for vector support in compiler" >&5
+printf %s "checking for vector support in compiler... " >&6; }
+if test ${libc_cv_loongarch_vec_com+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+void foo (void)
+{
+  asm volatile ("vldi \$vr0, 1" ::: "\$vr0");
+  asm volatile ("xvldi \$xr0, 1" ::: "\$xr0");
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  libc_cv_loongarch_vec_com=yes
+else $as_nop
+  libc_cv_loongarch_vec_com=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_com" >&5
+printf "%s\n" "$libc_cv_loongarch_vec_com" >&6; }
+if test "$libc_cv_loongarch_vec_com" = yes ;
+then
+  printf "%s\n" "#define HAVE_LOONGARCH_VEC_COM 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+loongarch-vec-com = $libc_cv_loongarch_vec_com"
+
diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
index 28a8ae5486..c56a203574 100644
--- a/sysdeps/loongarch/configure.ac
+++ b/sysdeps/loongarch/configure.ac
@@ -65,3 +65,20 @@ rm -f conftest*])
 if test $libc_cv_loongarch_vec_asm = no; then
   AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
 fi
+
+AC_CACHE_CHECK([for vector support in compiler],
+		libc_cv_loongarch_vec_com, [
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+void foo (void)
+{
+  asm volatile ("vldi \$vr0, 1" ::: "\$vr0");
+  asm volatile ("xvldi \$xr0, 1" ::: "\$xr0");
+}
+]])],
+                [libc_cv_loongarch_vec_com=yes],
+                [libc_cv_loongarch_vec_com=no])])
+if test "$libc_cv_loongarch_vec_com" = yes ;
+then
+  AC_DEFINE(HAVE_LOONGARCH_VEC_COM)
+fi
+LIBC_CONFIG_VAR([loongarch-vec-com], [$libc_cv_loongarch_vec_com])
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 6baf0e600a..ab6f1da7c0 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -25,7 +25,7 @@
 #include <entry.h>
 #include <elf/elf.h>
 #include <sys/asm.h>
-#include <dl-tls.h>
+#include <dl-tlsdesc.h>
 #include <dl-static-tls.h>
 #include <dl-machine-rel.h>
 
@@ -206,6 +206,36 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
       *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
       break;
 
+    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
+      {
+	struct tlsdesc volatile *td = (struct tlsdesc volatile *)addr_field;
+	if (sym == NULL)
+	  {
+	    td->arg = (void*)reloc->r_addend;
+	    td->entry = _dl_tlsdesc_undefweak;
+	  }
+	else
+	  {
+# ifndef SHARED
+	    CHECK_STATIC_TLS (map, sym_map);
+# else
+	    if (!TRY_STATIC_TLS (map, sym_map))
+	      {
+		td->arg = _dl_make_tlsdesc_dynamic (sym_map,
+			      sym->st_value + reloc->r_addend);
+		td->entry = _dl_tlsdesc_dynamic;
+	      }
+	    else
+# endif
+	      {
+		td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
+			    + reloc->r_addend);
+		td->entry = _dl_tlsdesc_return;
+	      }
+	  }
+	break;
+      }
+
     case R_LARCH_COPY:
       {
 	  if (sym == NULL)
@@ -274,6 +304,26 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
       else
 	*reloc_addr = map->l_mach.plt;
     }
+  else if (__glibc_likely (r_type == R_LARCH_TLS_DESC64)
+	    || __glibc_likely (r_type == R_LARCH_TLS_DESC32))
+    {
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
+
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum = (const void *)D_PTR (map,
+					  l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+			skip_ifunc);
+    }
   else
     _dl_reloc_bad_type (map, r_type, 1);
 }
diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
index 29924b866d..de593c002d 100644
--- a/sysdeps/loongarch/dl-tls.h
+++ b/sysdeps/loongarch/dl-tls.h
@@ -16,6 +16,9 @@
    License along with the GNU C Library.  If not, see
    <https://www.gnu.org/licenses/>.  */
 
+#ifndef _DL_TLS_H
+#define _DL_TLS_H
+
 /* Type used for the representation of TLS information in the GOT.  */
 typedef struct
 {
@@ -23,6 +26,8 @@ typedef struct
   unsigned long int ti_offset;
 } tls_index;
 
+extern void *__tls_get_addr (tls_index *ti);
+
 /* The thread pointer points to the first static TLS block.  */
 #define TLS_TP_OFFSET 0
 
@@ -37,10 +42,10 @@ typedef struct
 /* Compute the value for a DTPREL reloc.  */
 #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
 
-extern void *__tls_get_addr (tls_index *ti);
-
 #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
 #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
 
 /* Value used for dtv entries for which the allocation is delayed.  */
 #define TLS_DTV_UNALLOCATED ((void *) -1l)
+
+#endif
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
new file mode 100644
index 0000000000..15d5fa1c42
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -0,0 +1,436 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+	.text
+
+	/* Compute the thread pointer offset for symbols in the static
+	   TLS block. The offset is the same for all threads.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *);  */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	REG_L  a0, a0, 8
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Handler for undefined weak TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *);
+
+	   The second word of the descriptor contains the addend.
+	   Return the addend minus the thread pointer. This ensures
+	   that when the caller adds on the thread pointer it gets back
+	   the addend.  */
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	REG_L	a0, a0, 8
+	sub.d	a0, a0, tp
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+
+#ifdef SHARED
+
+#define FRAME_SIZE	  (-((-14 * SZREG) & ALMASK))
+#define FRAME_SIZE_LSX	  (-((-32 * SZVREG) & ALMASK))
+#define FRAME_SIZE_LASX	  (-((-32 * SZXREG) & ALMASK))
+#define FRAME_SIZE_FLOAT  (-((-24 * SZFREG) & ALMASK))
+
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV);
+	     if (__glibc_likely (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }  */
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+	ADDI	sp, sp, -24
+	REG_S	t0, sp, 0
+	REG_S	t1, sp, 8
+	REG_S	t2, sp, 16
+
+/* Runtime Storage Layout of Thread-Local Storage
+   TP point to the start of TLS block.
+
+				      dtv
+Low address	TCB ----------------> dtv0(counter)
+	 TP -->	static_block0  <----- dtv1
+		static_block1  <----- dtv2
+		static_block2  <----- dtv3
+		dynamic_block0 <----- dtv4
+Hign address	dynamic_block1 <----- dtv5  */
+
+	REG_L	t0, tp, -SIZE_OF_TCB	  /* t0 = dtv */
+	REG_L	a0, a0, TLSDESC_ARG	  /* a0(td) = tdp->arg */
+	REG_L	t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
+	REG_L	t2, t0, DTV_COUNTER	  /* t2 = dtv[0].counter */
+	/* If dtv[0].counter < td->gen_count, goto slow path.  */
+	bltu	t2, t1, .Lslow
+
+	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
+	/* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
+	slli.d	t1, t1, 4
+	add.d	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
+	REG_L	t1, t1, 0   /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
+	li.d	t2, TLS_DTV_UNALLOCATED
+	/* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
+	   goto slow path.  */
+	beq	t1, t2, .Lslow
+
+	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
+	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
+	add.d	a0, t1, t2
+.Lret:
+	sub.d	a0, a0, tp
+	REG_L	t0, sp, 0
+	REG_L	t1, sp, 8
+	REG_L	t2, sp, 16
+	ADDI	sp, sp, 24
+	RET
+
+.Lslow:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+	ADDI	sp, sp, -FRAME_SIZE
+	REG_S	ra, sp, 0 * SZREG
+	REG_S	a1, sp, 1 * SZREG
+	REG_S	a2, sp, 2 * SZREG
+	REG_S	a3, sp, 3 * SZREG
+	REG_S	a4, sp, 4 * SZREG
+	REG_S	a5, sp, 5 * SZREG
+	REG_S	a6, sp, 6 * SZREG
+	REG_S	a7, sp, 7 * SZREG
+	REG_S	t3, sp, 8 * SZREG
+	REG_S	t4, sp, 9 * SZREG
+	REG_S	t5, sp, 10 * SZREG
+	REG_S	t6, sp, 11 * SZREG
+	REG_S	t7, sp, 12 * SZREG
+	REG_S	t8, sp, 13 * SZREG
+
+#ifndef __loongarch_soft_float
+
+	/* Save fcsr0 register.
+	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
+	   of some fields in fcsr0.  */
+	ADDI	sp, sp, -SZFCSREG
+	movfcsr2gr  t0, fcsr0
+	st.w	t0, sp, 0
+
+	/* Whether support LASX.  */
+	la.global   t0, _rtld_global_ro
+	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
+	andi	t0, t0, HWCAP_LOONGARCH_LASX
+	beqz	t0, .Llsx
+
+	/* Save 256-bit vector registers.
+	   FIXME: Without vector ABI, save all vector registers.  */
+	ADDI	sp, sp, -FRAME_SIZE_LASX
+	xvst	xr0, sp, 0*SZXREG
+	xvst	xr1, sp, 1*SZXREG
+	xvst	xr2, sp, 2*SZXREG
+	xvst	xr3, sp, 3*SZXREG
+	xvst	xr4, sp, 4*SZXREG
+	xvst	xr5, sp, 5*SZXREG
+	xvst	xr6, sp, 6*SZXREG
+	xvst	xr7, sp, 7*SZXREG
+	xvst	xr8, sp, 8*SZXREG
+	xvst	xr9, sp, 9*SZXREG
+	xvst	xr10, sp, 10*SZXREG
+	xvst	xr11, sp, 11*SZXREG
+	xvst	xr12, sp, 12*SZXREG
+	xvst	xr13, sp, 13*SZXREG
+	xvst	xr14, sp, 14*SZXREG
+	xvst	xr15, sp, 15*SZXREG
+	xvst	xr16, sp, 16*SZXREG
+	xvst	xr17, sp, 17*SZXREG
+	xvst	xr18, sp, 18*SZXREG
+	xvst	xr19, sp, 19*SZXREG
+	xvst	xr20, sp, 20*SZXREG
+	xvst	xr21, sp, 21*SZXREG
+	xvst	xr22, sp, 22*SZXREG
+	xvst	xr23, sp, 23*SZXREG
+	xvst	xr24, sp, 24*SZXREG
+	xvst	xr25, sp, 25*SZXREG
+	xvst	xr26, sp, 26*SZXREG
+	xvst	xr27, sp, 27*SZXREG
+	xvst	xr28, sp, 28*SZXREG
+	xvst	xr29, sp, 29*SZXREG
+	xvst	xr30, sp, 30*SZXREG
+	xvst	xr31, sp, 31*SZXREG
+	b	    .Ltga
+
+.Llsx:
+	/* Whether support LSX.  */
+	andi	t0, t0, HWCAP_LOONGARCH_LSX
+	beqz	t0, .Lfloat
+
+	/* Save 128-bit vector registers.  */
+	ADDI	sp, sp, -FRAME_SIZE_LSX
+	vst	vr0, sp, 0*SZVREG
+	vst	vr1, sp, 1*SZVREG
+	vst	vr2, sp, 2*SZVREG
+	vst	vr3, sp, 3*SZVREG
+	vst	vr4, sp, 4*SZVREG
+	vst	vr5, sp, 5*SZVREG
+	vst	vr6, sp, 6*SZVREG
+	vst	vr7, sp, 7*SZVREG
+	vst	vr8, sp, 8*SZVREG
+	vst	vr9, sp, 9*SZVREG
+	vst	vr10, sp, 10*SZVREG
+	vst	vr11, sp, 11*SZVREG
+	vst	vr12, sp, 12*SZVREG
+	vst	vr13, sp, 13*SZVREG
+	vst	vr14, sp, 14*SZVREG
+	vst	vr15, sp, 15*SZVREG
+	vst	vr16, sp, 16*SZVREG
+	vst	vr17, sp, 17*SZVREG
+	vst	vr18, sp, 18*SZVREG
+	vst	vr19, sp, 19*SZVREG
+	vst	vr20, sp, 20*SZVREG
+	vst	vr21, sp, 21*SZVREG
+	vst	vr22, sp, 22*SZVREG
+	vst	vr23, sp, 23*SZVREG
+	vst	vr24, sp, 24*SZVREG
+	vst	vr25, sp, 25*SZVREG
+	vst	vr26, sp, 26*SZVREG
+	vst	vr27, sp, 27*SZVREG
+	vst	vr28, sp, 28*SZVREG
+	vst	vr29, sp, 29*SZVREG
+	vst	vr30, sp, 30*SZVREG
+	vst	vr31, sp, 31*SZVREG
+	b	    .Ltga
+
+.Lfloat:
+	/* Save float registers.  */
+	ADDI	sp, sp, -FRAME_SIZE_FLOAT
+	FREG_S	fa0, sp, 0*SZFREG
+	FREG_S	fa1, sp, 1*SZFREG
+	FREG_S	fa2, sp, 2*SZFREG
+	FREG_S	fa3, sp, 3*SZFREG
+	FREG_S	fa4, sp, 4*SZFREG
+	FREG_S	fa5, sp, 5*SZFREG
+	FREG_S	fa6, sp, 6*SZFREG
+	FREG_S	fa7, sp, 7*SZFREG
+	FREG_S	ft0, sp, 8*SZFREG
+	FREG_S	ft1, sp, 9*SZFREG
+	FREG_S	ft2, sp, 10*SZFREG
+	FREG_S	ft3, sp, 11*SZFREG
+	FREG_S	ft4, sp, 12*SZFREG
+	FREG_S	ft5, sp, 13*SZFREG
+	FREG_S	ft6, sp, 14*SZFREG
+	FREG_S	ft7, sp, 15*SZFREG
+	FREG_S	ft8, sp, 16*SZFREG
+	FREG_S	ft9, sp, 17*SZFREG
+	FREG_S	ft10, sp, 18*SZFREG
+	FREG_S	ft11, sp, 19*SZFREG
+	FREG_S	ft12, sp, 20*SZFREG
+	FREG_S	ft13, sp, 21*SZFREG
+	FREG_S	ft14, sp, 22*SZFREG
+	FREG_S	ft15, sp, 23*SZFREG
+
+#endif /* #ifndef __loongarch_soft_float */
+
+.Ltga:
+	bl	HIDDEN_JUMPTARGET(__tls_get_addr)
+	ADDI	a0, a0, -TLS_DTV_OFFSET
+
+#ifndef __loongarch_soft_float
+
+	la.global   t0, _rtld_global_ro
+	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
+	andi	t0, t0, HWCAP_LOONGARCH_LASX
+	beqz	t0, .Llsx1
+
+	/* Restore 256-bit vector registers.  */
+	xvld	xr0, sp, 0*SZXREG
+	xvld	xr1, sp, 1*SZXREG
+	xvld	xr2, sp, 2*SZXREG
+	xvld	xr3, sp, 3*SZXREG
+	xvld	xr4, sp, 4*SZXREG
+	xvld	xr5, sp, 5*SZXREG
+	xvld	xr6, sp, 6*SZXREG
+	xvld	xr7, sp, 7*SZXREG
+	xvld	xr8, sp, 8*SZXREG
+	xvld	xr9, sp, 9*SZXREG
+	xvld	xr10, sp, 10*SZXREG
+	xvld	xr11, sp, 11*SZXREG
+	xvld	xr12, sp, 12*SZXREG
+	xvld	xr13, sp, 13*SZXREG
+	xvld	xr14, sp, 14*SZXREG
+	xvld	xr15, sp, 15*SZXREG
+	xvld	xr16, sp, 16*SZXREG
+	xvld	xr17, sp, 17*SZXREG
+	xvld	xr18, sp, 18*SZXREG
+	xvld	xr19, sp, 19*SZXREG
+	xvld	xr20, sp, 20*SZXREG
+	xvld	xr21, sp, 21*SZXREG
+	xvld	xr22, sp, 22*SZXREG
+	xvld	xr23, sp, 23*SZXREG
+	xvld	xr24, sp, 24*SZXREG
+	xvld	xr25, sp, 25*SZXREG
+	xvld	xr26, sp, 26*SZXREG
+	xvld	xr27, sp, 27*SZXREG
+	xvld	xr28, sp, 28*SZXREG
+	xvld	xr29, sp, 29*SZXREG
+	xvld	xr30, sp, 30*SZXREG
+	xvld	xr31, sp, 31*SZXREG
+	ADDI	sp, sp, FRAME_SIZE_LASX
+	b .Lfcsr
+
+.Llsx1:
+	andi	t0, s0, HWCAP_LOONGARCH_LSX
+	beqz	t0, .Lfloat1
+
+	/* Restore 128-bit vector registers.  */
+	vld	vr0, sp, 0*SZVREG
+	vld	vr1, sp, 1*SZVREG
+	vld	vr2, sp, 2*SZVREG
+	vld	vr3, sp, 3*SZVREG
+	vld	vr4, sp, 4*SZVREG
+	vld	vr5, sp, 5*SZVREG
+	vld	vr6, sp, 6*SZVREG
+	vld	vr7, sp, 7*SZVREG
+	vld	vr8, sp, 8*SZVREG
+	vld	vr9, sp, 9*SZVREG
+	vld	vr10, sp, 10*SZVREG
+	vld	vr11, sp, 11*SZVREG
+	vld	vr12, sp, 12*SZVREG
+	vld	vr13, sp, 13*SZVREG
+	vld	vr14, sp, 14*SZVREG
+	vld	vr15, sp, 15*SZVREG
+	vld	vr16, sp, 16*SZVREG
+	vld	vr17, sp, 17*SZVREG
+	vld	vr18, sp, 18*SZVREG
+	vld	vr19, sp, 19*SZVREG
+	vld	vr20, sp, 20*SZVREG
+	vld	vr21, sp, 21*SZVREG
+	vld	vr22, sp, 22*SZVREG
+	vld	vr23, sp, 23*SZVREG
+	vld	vr24, sp, 24*SZVREG
+	vld	vr25, sp, 25*SZVREG
+	vld	vr26, sp, 26*SZVREG
+	vld	vr27, sp, 27*SZVREG
+	vld	vr28, sp, 28*SZVREG
+	vld	vr29, sp, 29*SZVREG
+	vld	vr30, sp, 30*SZVREG
+	vld	vr31, sp, 31*SZVREG
+	ADDI	sp, sp, FRAME_SIZE_LSX
+	b	    .Lfcsr
+
+.Lfloat1:
+	/* Restore float registers.  */
+	FREG_L	fa0, sp, 0*SZFREG
+	FREG_L	fa1, sp, 1*SZFREG
+	FREG_L	fa2, sp, 2*SZFREG
+	FREG_L	fa3, sp, 3*SZFREG
+	FREG_L	fa4, sp, 4*SZFREG
+	FREG_L	fa5, sp, 5*SZFREG
+	FREG_L	fa6, sp, 6*SZFREG
+	FREG_L	fa7, sp, 7*SZFREG
+	FREG_L	ft0, sp, 8*SZFREG
+	FREG_L	ft1, sp, 9*SZFREG
+	FREG_L	ft2, sp, 10*SZFREG
+	FREG_L	ft3, sp, 11*SZFREG
+	FREG_L	ft4, sp, 12*SZFREG
+	FREG_L	ft5, sp, 13*SZFREG
+	FREG_L	ft6, sp, 14*SZFREG
+	FREG_L	ft7, sp, 15*SZFREG
+	FREG_L	ft8, sp, 16*SZFREG
+	FREG_L	ft9, sp, 17*SZFREG
+	FREG_L	ft10, sp, 18*SZFREG
+	FREG_L	ft11, sp, 19*SZFREG
+	FREG_L	ft12, sp, 20*SZFREG
+	FREG_L	ft13, sp, 21*SZFREG
+	FREG_L	ft14, sp, 22*SZFREG
+	FREG_L	ft15, sp, 23*SZFREG
+	ADDI	sp, sp, FRAME_SIZE_FLOAT
+
+.Lfcsr:
+	/* Restore fcsr0 register.  */
+	ld.w	t0, sp, 0
+	movgr2fcsr  fcsr0, t0
+	ADDI	sp, sp, SZFCSREG
+
+#endif /* #ifndef __loongarch_soft_float */
+
+	REG_L	ra, sp, 0 * SZREG
+	REG_L	a1, sp, 1 * SZREG
+	REG_L	a2, sp, 2 * SZREG
+	REG_L	a3, sp, 3 * SZREG
+	REG_L	a4, sp, 4 * SZREG
+	REG_L	a5, sp, 5 * SZREG
+	REG_L	a6, sp, 6 * SZREG
+	REG_L	a7, sp, 7 * SZREG
+	REG_L	t3, sp, 8 * SZREG
+	REG_L	t4, sp, 9 * SZREG
+	REG_L	t5, sp, 10 * SZREG
+	REG_L	t6, sp, 11 * SZREG
+	REG_L	t7, sp, 12 * SZREG
+	REG_L	t8, sp, 13 * SZREG
+	ADDI	sp, sp, FRAME_SIZE
+
+	b	.Lret
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+	.hidden HIDDEN_JUMPTARGET(__tls_get_addr)
+
+#endif /* #ifdef SHARED */
diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
new file mode 100644
index 0000000000..ff8c69cb93
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.h
@@ -0,0 +1,49 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+   LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_TLSDESC_H
+#define _DL_TLSDESC_H
+
+#include <dl-tls.h>
+
+/* Type used to represent a TLS descriptor in the GOT.  */
+struct tlsdesc
+{
+  ptrdiff_t (*entry) (struct tlsdesc *);
+  void *arg;
+};
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+   needs dynamic TLS offsets.  */
+struct tlsdesc_dynamic_arg
+{
+  tls_index tlsinfo;
+  size_t gen_count;
+};
+
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
+
+#ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
+#endif
+
+#endif
diff --git a/sysdeps/loongarch/fpu_control.h b/sysdeps/loongarch/fpu_control.h
index 54add4e01c..3cdf2417d9 100644
--- a/sysdeps/loongarch/fpu_control.h
+++ b/sysdeps/loongarch/fpu_control.h
@@ -91,8 +91,8 @@ typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__)));
 /* Macros for accessing the hardware control word.  */
 extern fpu_control_t __loongarch_fpu_getcw (void) __THROW;
 extern void __loongarch_fpu_setcw (fpu_control_t) __THROW;
-#define _FPU_GETCW(cw) __asm__ volatile ("movfcsr2gr %0,$r0" : "=r"(cw))
-#define _FPU_SETCW(cw) __asm__ volatile ("movgr2fcsr $r0,%0" : : "r"(cw))
+#define _FPU_GETCW(cw) __asm__ volatile ("movfcsr2gr %0,$fcsr0" : "=r"(cw))
+#define _FPU_SETCW(cw) __asm__ volatile ("movgr2fcsr $fcsr0,%0" : : "r"(cw))
 
 /* Default control word set at startup.  */
 extern fpu_control_t __fpu_control;
diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
index 4d8737ee7f..833dc9eb82 100644
--- a/sysdeps/loongarch/linkmap.h
+++ b/sysdeps/loongarch/linkmap.h
@@ -18,5 +18,6 @@
 
 struct link_map_machine
 {
-  ElfW (Addr) plt; /* Address of .plt.  */
+  ElfW (Addr) plt;	/* Address of .plt.  */
+  void *tlsdesc_table;	/* Address of TLS descriptor hash table.  */
 };
diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps
index 770bf36b11..185c5b4e92 100644
--- a/sysdeps/loongarch/lp64/libm-test-ulps
+++ b/sysdeps/loongarch/lp64/libm-test-ulps
@@ -1202,6 +1202,26 @@ Function: "log2_upward":
 double: 3
 ldouble: 1
 
+Function: "log2p1":
+double: 1
+float: 1
+ldouble: 3
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+ldouble: 3
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+ldouble: 2
+
+Function: "log2p1_upward":
+double: 1
+float: 2
+ldouble: 2
+
 Function: "log_downward":
 ldouble: 1
 
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
index 9826d21c2e..3606225ca2 100644
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
@@ -98,5 +98,7 @@ L(out):
     jr          ra
 END(STRNLEN)
 
+#if !IS_IN (libc)
 weak_alias (STRNLEN, strnlen)
 libc_hidden_builtin_def (STRNLEN)
+#endif
diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure
index dfc7ecfd9e..0d1e9ed8df 100644
--- a/sysdeps/loongarch/preconfigure
+++ b/sysdeps/loongarch/preconfigure
@@ -43,6 +43,7 @@ loongarch*)
 
 
     base_machine=loongarch
+    mtls_descriptor=desc
     ;;
 esac
 
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
index 51521a7eb4..23c1d12914 100644
--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
@@ -25,6 +25,7 @@
 /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
 #define SZREG 8
 #define SZFREG 8
+#define SZFCSREG 4
 #define SZVREG 16
 #define SZXREG 32
 #define REG_L ld.d
diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
index f61ee25b25..80ce3e9c00 100644
--- a/sysdeps/loongarch/sys/regdef.h
+++ b/sysdeps/loongarch/sys/regdef.h
@@ -97,6 +97,7 @@
 #define fcc5 $fcc5
 #define fcc6 $fcc6
 #define fcc7 $fcc7
+#define fcsr0 $fcsr0
 
 #define vr0 $vr0
 #define vr1 $vr1
diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
new file mode 100644
index 0000000000..76708f7e1e
--- /dev/null
+++ b/sysdeps/loongarch/tlsdesc.c
@@ -0,0 +1,39 @@
+/* Manage TLS descriptors.  LoongArch64 version.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-unmap-segments.h>
+#include <tlsdeschtab.h>
+
+/* Unmap the dynamic object, but also release its TLS descriptor table
+   if there is one.  */
+
+void
+_dl_unmap (struct link_map *map)
+{
+  _dl_unmap_segments (map);
+
+#ifdef SHARED
+  if (map->l_mach.tlsdesc_table)
+    htab_delete (map->l_mach.tlsdesc_table);
+#endif
+}
diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
new file mode 100644
index 0000000000..213d0b3074
--- /dev/null
+++ b/sysdeps/loongarch/tlsdesc.sym
@@ -0,0 +1,28 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG		offsetof(struct tlsdesc, arg)
+TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
+DTV_COUNTER		offsetof(dtv_t, counter)
+TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
+TLS_DTV_OFFSET		TLS_DTV_OFFSET
+SIZE_OF_TCB		sizeof(tcbhead_t)
+GLRO_DL_HWCAP_OFFSET    GLRO_offsetof (dl_hwcap)
+HWCAP_LOONGARCH_LSX	HWCAP_LOONGARCH_LSX
+HWCAP_LOONGARCH_LASX	HWCAP_LOONGARCH_LASX
diff --git a/sysdeps/loongarch/tst-gnu2-tls2.h b/sysdeps/loongarch/tst-gnu2-tls2.h
new file mode 100644
index 0000000000..8e4216785d
--- /dev/null
+++ b/sysdeps/loongarch/tst-gnu2-tls2.h
@@ -0,0 +1,377 @@
+/* Test TLSDESC relocation.  LoongArch64 version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+/* The instruction between BEFORE_TLSDESC_CALL and _dl_tlsdesc_dynamic,
+   and the instruction between _dl_tlsdesc_dynamic and AFTER_TLSDESC_CALL,
+   may modified most of the general-purpose register.  */
+#define	SAVE_REGISTER(src)						\
+  asm volatile ("st.d $r3, %0" :"=m"(src) :);
+
+#ifdef __loongarch_soft_float
+
+#define BEFORE_TLSDESC_CALL()						\
+  uint64_t src;								\
+  SAVE_REGISTER (src);
+
+#define AFTER_TLSDESC_CALL()						\
+  uint64_t restore;							\
+  SAVE_REGISTER (restore);						\
+  if (src != restore)							\
+    abort ();
+
+#else /* hard float */
+
+#define SAVE_REGISTER_FCC(src)				\
+  asm volatile ("movcf2gr $t0, $fcc0" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[0]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc1" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[1]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc2" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[2]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc3" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[3]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc4" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[4]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc5" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[5]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc6" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[6]) :);	\
+  asm volatile ("movcf2gr $t0, $fcc7" ::: "$t0");	\
+  asm volatile ("st.d $t0, %0" :"=m"(src[7]) :);
+
+#define LOAD_REGISTER_FCSR()				\
+  asm volatile ("li.d $t0, 0x01010101" ::: "$t0");	\
+  asm volatile ("movgr2fcsr $fcsr0, $t0" :::);
+
+#define SAVE_REGISTER_FCSR()				\
+  asm volatile ("movfcsr2gr $t0, $fcsr0" ::: "$t0");    \
+  asm volatile ("st.d $t0, %0" :"=m"(restore_fcsr) :);
+
+# define INIT_TLSDESC_CALL()						\
+  unsigned long hwcap = getauxval (AT_HWCAP);
+
+#define	LOAD_REGISTER_FLOAT()						\
+  asm volatile ("fld.d $f0, %0" ::"m"(src_float[0]) :"$f0");		\
+  asm volatile ("fld.d $f1, %0" ::"m"(src_float[1]) :"$f1"); 		\
+  asm volatile ("fld.d $f2, %0" ::"m"(src_float[2]) :"$f2"); 		\
+  asm volatile ("fld.d $f3, %0" ::"m"(src_float[3]) :"$f3"); 		\
+  asm volatile ("fld.d $f4, %0" ::"m"(src_float[4]) :"$f4"); 		\
+  asm volatile ("fld.d $f5, %0" ::"m"(src_float[5]) :"$f5"); 		\
+  asm volatile ("fld.d $f6, %0" ::"m"(src_float[6]) :"$f6"); 		\
+  asm volatile ("fld.d $f7, %0" ::"m"(src_float[7]) :"$f7"); 		\
+  asm volatile ("fld.d $f8, %0" ::"m"(src_float[8]) :"$f8"); 		\
+  asm volatile ("fld.d $f9, %0" ::"m"(src_float[9]) :"$f9"); 		\
+  asm volatile ("fld.d $f10, %0" ::"m"(src_float[10]) :"$f10");		\
+  asm volatile ("fld.d $f11, %0" ::"m"(src_float[11]) :"$f11");		\
+  asm volatile ("fld.d $f12, %0" ::"m"(src_float[12]) :"$f12");		\
+  asm volatile ("fld.d $f13, %0" ::"m"(src_float[13]) :"$f13");		\
+  asm volatile ("fld.d $f14, %0" ::"m"(src_float[14]) :"$f14");		\
+  asm volatile ("fld.d $f15, %0" ::"m"(src_float[15]) :"$f15");		\
+  asm volatile ("fld.d $f16, %0" ::"m"(src_float[16]) :"$f16");		\
+  asm volatile ("fld.d $f17, %0" ::"m"(src_float[17]) :"$f17");		\
+  asm volatile ("fld.d $f18, %0" ::"m"(src_float[18]) :"$f18");		\
+  asm volatile ("fld.d $f19, %0" ::"m"(src_float[19]) :"$f19");		\
+  asm volatile ("fld.d $f20, %0" ::"m"(src_float[20]) :"$f20");		\
+  asm volatile ("fld.d $f21, %0" ::"m"(src_float[21]) :"$f21");		\
+  asm volatile ("fld.d $f22, %0" ::"m"(src_float[22]) :"$f22");		\
+  asm volatile ("fld.d $f23, %0" ::"m"(src_float[23]) :"$f23");		\
+  asm volatile ("fld.d $f24, %0" ::"m"(src_float[24]) :"$f24");		\
+  asm volatile ("fld.d $f25, %0" ::"m"(src_float[25]) :"$f25");		\
+  asm volatile ("fld.d $f26, %0" ::"m"(src_float[26]) :"$f26");		\
+  asm volatile ("fld.d $f27, %0" ::"m"(src_float[27]) :"$f27");		\
+  asm volatile ("fld.d $f28, %0" ::"m"(src_float[28]) :"$f28");		\
+  asm volatile ("fld.d $f29, %0" ::"m"(src_float[29]) :"$f29");		\
+  asm volatile ("fld.d $f30, %0" ::"m"(src_float[30]) :"$f30");		\
+  asm volatile ("fld.d $f31, %0" ::"m"(src_float[31]) :"$f31");
+
+#define	SAVE_REGISTER_FLOAT()						\
+  asm volatile ("fst.d $f0, %0" :"=m"(restore_float[0]) :);		\
+  asm volatile ("fst.d $f1, %0" :"=m"(restore_float[1]) :); 		\
+  asm volatile ("fst.d $f2, %0" :"=m"(restore_float[2]) :); 		\
+  asm volatile ("fst.d $f3, %0" :"=m"(restore_float[3]) :); 		\
+  asm volatile ("fst.d $f4, %0" :"=m"(restore_float[4]) :); 		\
+  asm volatile ("fst.d $f5, %0" :"=m"(restore_float[5]) :); 		\
+  asm volatile ("fst.d $f6, %0" :"=m"(restore_float[6]) :); 		\
+  asm volatile ("fst.d $f7, %0" :"=m"(restore_float[7]) :); 		\
+  asm volatile ("fst.d $f8, %0" :"=m"(restore_float[8]) :); 		\
+  asm volatile ("fst.d $f9, %0" :"=m"(restore_float[9]) :); 		\
+  asm volatile ("fst.d $f10, %0" :"=m"(restore_float[10]) :);		\
+  asm volatile ("fst.d $f11, %0" :"=m"(restore_float[11]) :);		\
+  asm volatile ("fst.d $f12, %0" :"=m"(restore_float[12]) :);		\
+  asm volatile ("fst.d $f13, %0" :"=m"(restore_float[13]) :);		\
+  asm volatile ("fst.d $f14, %0" :"=m"(restore_float[14]) :);		\
+  asm volatile ("fst.d $f15, %0" :"=m"(restore_float[15]) :);		\
+  asm volatile ("fst.d $f16, %0" :"=m"(restore_float[16]) :);		\
+  asm volatile ("fst.d $f17, %0" :"=m"(restore_float[17]) :);		\
+  asm volatile ("fst.d $f18, %0" :"=m"(restore_float[18]) :);		\
+  asm volatile ("fst.d $f19, %0" :"=m"(restore_float[19]) :);		\
+  asm volatile ("fst.d $f20, %0" :"=m"(restore_float[20]) :);		\
+  asm volatile ("fst.d $f21, %0" :"=m"(restore_float[21]) :);		\
+  asm volatile ("fst.d $f22, %0" :"=m"(restore_float[22]) :);		\
+  asm volatile ("fst.d $f23, %0" :"=m"(restore_float[23]) :);		\
+  asm volatile ("fst.d $f24, %0" :"=m"(restore_float[24]) :);		\
+  asm volatile ("fst.d $f25, %0" :"=m"(restore_float[25]) :);		\
+  asm volatile ("fst.d $f26, %0" :"=m"(restore_float[26]) :);		\
+  asm volatile ("fst.d $f27, %0" :"=m"(restore_float[27]) :);		\
+  asm volatile ("fst.d $f28, %0" :"=m"(restore_float[28]) :);		\
+  asm volatile ("fst.d $f29, %0" :"=m"(restore_float[29]) :);		\
+  asm volatile ("fst.d $f30, %0" :"=m"(restore_float[30]) :);		\
+  asm volatile ("fst.d $f31, %0" :"=m"(restore_float[31]) :);
+
+#ifdef HAVE_LOONGARCH_VEC_COM
+  #define	LOAD_REGISTER_LSX()					\
+    /* Every byte in $vr0 is 1.  */					\
+    asm volatile ("vldi $vr0, 1" ::: "$vr0");				\
+    asm volatile ("vldi $vr1, 2" ::: "$vr1"); 				\
+    asm volatile ("vldi $vr2, 3" ::: "$vr2"); 				\
+    asm volatile ("vldi $vr3, 4" ::: "$vr3"); 				\
+    asm volatile ("vldi $vr4, 5" ::: "$vr4"); 				\
+    asm volatile ("vldi $vr5, 6" ::: "$vr5"); 				\
+    asm volatile ("vldi $vr6, 7" ::: "$vr6"); 				\
+    asm volatile ("vldi $vr7, 8" ::: "$vr7"); 				\
+    asm volatile ("vldi $vr8, 9" ::: "$vr8"); 				\
+    asm volatile ("vldi $vr9, 10" ::: "$vr9"); 				\
+    asm volatile ("vldi $vr10, 11" ::: "$vr10");			\
+    asm volatile ("vldi $vr11, 12" ::: "$vr11");			\
+    asm volatile ("vldi $vr12, 13" ::: "$vr12");			\
+    asm volatile ("vldi $vr13, 14" ::: "$vr13");			\
+    asm volatile ("vldi $vr14, 15" ::: "$vr14");			\
+    asm volatile ("vldi $vr15, 16" ::: "$vr15");			\
+    asm volatile ("vldi $vr16, 17" ::: "$vr16");			\
+    asm volatile ("vldi $vr17, 18" ::: "$vr17");			\
+    asm volatile ("vldi $vr18, 19" ::: "$vr18");			\
+    asm volatile ("vldi $vr19, 20" ::: "$vr19");			\
+    asm volatile ("vldi $vr20, 21" ::: "$vr20");			\
+    asm volatile ("vldi $vr21, 22" ::: "$vr21");			\
+    asm volatile ("vldi $vr22, 23" ::: "$vr22");			\
+    asm volatile ("vldi $vr23, 24" ::: "$vr23");			\
+    asm volatile ("vldi $vr24, 25" ::: "$vr24");			\
+    asm volatile ("vldi $vr25, 26" ::: "$vr25");			\
+    asm volatile ("vldi $vr26, 27" ::: "$vr26");			\
+    asm volatile ("vldi $vr27, 28" ::: "$vr27");			\
+    asm volatile ("vldi $vr28, 29" ::: "$vr28");			\
+    asm volatile ("vldi $vr29, 30" ::: "$vr29");			\
+    asm volatile ("vldi $vr30, 31" ::: "$vr30");			\
+    asm volatile ("vldi $vr31, 32" ::: "$vr31");
+#else
+  #define	LOAD_REGISTER_LSX()
+#endif
+
+#ifdef HAVE_LOONGARCH_VEC_COM
+  #define	SAVE_REGISTER_LSX()					\
+    int src_lsx[32][4];							\
+    int restore_lsx[32][4];						\
+    asm volatile ("vst $vr0, %0" :"=m"(restore_lsx[0]) :);		\
+    asm volatile ("vst $vr1, %0" :"=m"(restore_lsx[1]) :); 		\
+    asm volatile ("vst $vr2, %0" :"=m"(restore_lsx[2]) :); 		\
+    asm volatile ("vst $vr3, %0" :"=m"(restore_lsx[3]) :); 		\
+    asm volatile ("vst $vr4, %0" :"=m"(restore_lsx[4]) :); 		\
+    asm volatile ("vst $vr5, %0" :"=m"(restore_lsx[5]) :); 		\
+    asm volatile ("vst $vr6, %0" :"=m"(restore_lsx[6]) :); 		\
+    asm volatile ("vst $vr7, %0" :"=m"(restore_lsx[7]) :); 		\
+    asm volatile ("vst $vr8, %0" :"=m"(restore_lsx[8]) :); 		\
+    asm volatile ("vst $vr9, %0" :"=m"(restore_lsx[9]) :); 		\
+    asm volatile ("vst $vr10, %0" :"=m"(restore_lsx[10]) :);		\
+    asm volatile ("vst $vr11, %0" :"=m"(restore_lsx[11]) :);		\
+    asm volatile ("vst $vr12, %0" :"=m"(restore_lsx[12]) :);		\
+    asm volatile ("vst $vr13, %0" :"=m"(restore_lsx[13]) :);		\
+    asm volatile ("vst $vr14, %0" :"=m"(restore_lsx[14]) :);		\
+    asm volatile ("vst $vr15, %0" :"=m"(restore_lsx[15]) :);		\
+    asm volatile ("vst $vr16, %0" :"=m"(restore_lsx[16]) :);		\
+    asm volatile ("vst $vr17, %0" :"=m"(restore_lsx[17]) :);		\
+    asm volatile ("vst $vr18, %0" :"=m"(restore_lsx[18]) :);		\
+    asm volatile ("vst $vr19, %0" :"=m"(restore_lsx[19]) :);		\
+    asm volatile ("vst $vr20, %0" :"=m"(restore_lsx[20]) :);		\
+    asm volatile ("vst $vr21, %0" :"=m"(restore_lsx[21]) :);		\
+    asm volatile ("vst $vr22, %0" :"=m"(restore_lsx[22]) :);		\
+    asm volatile ("vst $vr23, %0" :"=m"(restore_lsx[23]) :);		\
+    asm volatile ("vst $vr24, %0" :"=m"(restore_lsx[24]) :);		\
+    asm volatile ("vst $vr25, %0" :"=m"(restore_lsx[25]) :);		\
+    asm volatile ("vst $vr26, %0" :"=m"(restore_lsx[26]) :);		\
+    asm volatile ("vst $vr27, %0" :"=m"(restore_lsx[27]) :);		\
+    asm volatile ("vst $vr28, %0" :"=m"(restore_lsx[28]) :);		\
+    asm volatile ("vst $vr29, %0" :"=m"(restore_lsx[29]) :);		\
+    asm volatile ("vst $vr30, %0" :"=m"(restore_lsx[30]) :);		\
+    asm volatile ("vst $vr31, %0" :"=m"(restore_lsx[31]) :);		\
+    for (int i = 0; i < 32; i++)					\
+      for (int j = 0; j < 4; j++)					\
+	{								\
+	  src_lsx[i][j] = 0x01010101 * (i + 1);				\
+	  if (src_lsx[i][j] != restore_lsx[i][j])			\
+	    abort ();							\
+	}
+#else
+  #define	SAVE_REGISTER_LSX()
+#endif
+
+#ifdef HAVE_LOONGARCH_VEC_COM
+  #define	LOAD_REGISTER_LASX()					\
+    /* Every byte in $xr0 is 1.  */					\
+    asm volatile ("xvldi $xr0, 1" ::: "$xr0");				\
+    asm volatile ("xvldi $xr1, 2" ::: "$xr1"); 				\
+    asm volatile ("xvldi $xr2, 3" ::: "$xr2"); 				\
+    asm volatile ("xvldi $xr3, 4" ::: "$xr3"); 				\
+    asm volatile ("xvldi $xr4, 5" ::: "$xr4"); 				\
+    asm volatile ("xvldi $xr5, 6" ::: "$xr5"); 				\
+    asm volatile ("xvldi $xr6, 7" ::: "$xr6"); 				\
+    asm volatile ("xvldi $xr7, 8" ::: "$xr7"); 				\
+    asm volatile ("xvldi $xr8, 9" ::: "$xr8"); 				\
+    asm volatile ("xvldi $xr9, 10" ::: "$xr9"); 			\
+    asm volatile ("xvldi $xr10, 11" ::: "$xr10");			\
+    asm volatile ("xvldi $xr11, 12" ::: "$xr11");			\
+    asm volatile ("xvldi $xr12, 13" ::: "$xr12");			\
+    asm volatile ("xvldi $xr13, 14" ::: "$xr13");			\
+    asm volatile ("xvldi $xr14, 15" ::: "$xr14");			\
+    asm volatile ("xvldi $xr15, 16" ::: "$xr15");			\
+    asm volatile ("xvldi $xr16, 17" ::: "$xr16");			\
+    asm volatile ("xvldi $xr17, 18" ::: "$xr17");			\
+    asm volatile ("xvldi $xr18, 19" ::: "$xr18");			\
+    asm volatile ("xvldi $xr19, 20" ::: "$xr19");			\
+    asm volatile ("xvldi $xr20, 21" ::: "$xr20");			\
+    asm volatile ("xvldi $xr21, 22" ::: "$xr21");			\
+    asm volatile ("xvldi $xr22, 23" ::: "$xr22");			\
+    asm volatile ("xvldi $xr23, 24" ::: "$xr23");			\
+    asm volatile ("xvldi $xr24, 25" ::: "$xr24");			\
+    asm volatile ("xvldi $xr25, 26" ::: "$xr25");			\
+    asm volatile ("xvldi $xr26, 27" ::: "$xr26");			\
+    asm volatile ("xvldi $xr27, 28" ::: "$xr27");			\
+    asm volatile ("xvldi $xr28, 29" ::: "$xr28");			\
+    asm volatile ("xvldi $xr29, 30" ::: "$xr29");			\
+    asm volatile ("xvldi $xr30, 31" ::: "$xr30");			\
+    asm volatile ("xvldi $xr31, 32" ::: "$xr31");
+#else
+  #define	LOAD_REGISTER_LASX()
+#endif
+
+#ifdef HAVE_LOONGARCH_VEC_COM
+  #define	SAVE_REGISTER_LASX()					\
+    int src_lasx[32][8];						\
+    int restore_lasx[32][8];						\
+    asm volatile ("xvst $xr0, %0" :"=m"(restore_lasx[0]) :);		\
+    asm volatile ("xvst $xr1, %0" :"=m"(restore_lasx[1]) :); 		\
+    asm volatile ("xvst $xr2, %0" :"=m"(restore_lasx[2]) :); 		\
+    asm volatile ("xvst $xr3, %0" :"=m"(restore_lasx[3]) :); 		\
+    asm volatile ("xvst $xr4, %0" :"=m"(restore_lasx[4]) :); 		\
+    asm volatile ("xvst $xr5, %0" :"=m"(restore_lasx[5]) :); 		\
+    asm volatile ("xvst $xr6, %0" :"=m"(restore_lasx[6]) :); 		\
+    asm volatile ("xvst $xr7, %0" :"=m"(restore_lasx[7]) :); 		\
+    asm volatile ("xvst $xr8, %0" :"=m"(restore_lasx[8]) :); 		\
+    asm volatile ("xvst $xr9, %0" :"=m"(restore_lasx[9]) :); 		\
+    asm volatile ("xvst $xr10, %0" :"=m"(restore_lasx[10]) :);		\
+    asm volatile ("xvst $xr11, %0" :"=m"(restore_lasx[11]) :);		\
+    asm volatile ("xvst $xr12, %0" :"=m"(restore_lasx[12]) :);		\
+    asm volatile ("xvst $xr13, %0" :"=m"(restore_lasx[13]) :);		\
+    asm volatile ("xvst $xr14, %0" :"=m"(restore_lasx[14]) :);		\
+    asm volatile ("xvst $xr15, %0" :"=m"(restore_lasx[15]) :);		\
+    asm volatile ("xvst $xr16, %0" :"=m"(restore_lasx[16]) :);		\
+    asm volatile ("xvst $xr17, %0" :"=m"(restore_lasx[17]) :);		\
+    asm volatile ("xvst $xr18, %0" :"=m"(restore_lasx[18]) :);		\
+    asm volatile ("xvst $xr19, %0" :"=m"(restore_lasx[19]) :);		\
+    asm volatile ("xvst $xr20, %0" :"=m"(restore_lasx[20]) :);		\
+    asm volatile ("xvst $xr21, %0" :"=m"(restore_lasx[21]) :);		\
+    asm volatile ("xvst $xr22, %0" :"=m"(restore_lasx[22]) :);		\
+    asm volatile ("xvst $xr23, %0" :"=m"(restore_lasx[23]) :);		\
+    asm volatile ("xvst $xr24, %0" :"=m"(restore_lasx[24]) :);		\
+    asm volatile ("xvst $xr25, %0" :"=m"(restore_lasx[25]) :);		\
+    asm volatile ("xvst $xr26, %0" :"=m"(restore_lasx[26]) :);		\
+    asm volatile ("xvst $xr27, %0" :"=m"(restore_lasx[27]) :);		\
+    asm volatile ("xvst $xr28, %0" :"=m"(restore_lasx[28]) :);		\
+    asm volatile ("xvst $xr29, %0" :"=m"(restore_lasx[29]) :);		\
+    asm volatile ("xvst $xr30, %0" :"=m"(restore_lasx[30]) :);		\
+    asm volatile ("xvst $xr31, %0" :"=m"(restore_lasx[31]) :);		\
+    for (int i = 0; i < 32; i++)					\
+      for (int j = 0; j < 8; j++)					\
+	{								\
+	  src_lasx[i][j] = 0x01010101 * (i + 1);			\
+	  if (src_lasx[i][j] != restore_lasx[i][j])			\
+	    abort ();							\
+	}
+#else
+  #define	SAVE_REGISTER_LASX()
+#endif
+
+#define BEFORE_TLSDESC_CALL()						\
+  uint64_t src;								\
+  double src_float[32];							\
+  uint64_t src_fcc[8];							\
+  for (int i = 0; i < 32; i++)						\
+    src_float[i] = i + 1;						\
+									\
+  SAVE_REGISTER (src);							\
+  LOAD_REGISTER_FCSR ();						\
+  SAVE_REGISTER_FCC(src_fcc)						\
+									\
+  if (hwcap & HWCAP_LOONGARCH_LASX)					\
+    {									\
+      LOAD_REGISTER_LASX ();						\
+    }									\
+  else if (hwcap & HWCAP_LOONGARCH_LSX)					\
+    {									\
+      LOAD_REGISTER_LSX ();						\
+    }									\
+  else									\
+    {									\
+      LOAD_REGISTER_FLOAT ();						\
+    }
+
+#define AFTER_TLSDESC_CALL()						\
+  uint64_t restore;							\
+  uint64_t src_fcsr = 0x01010101;					\
+  uint64_t restore_fcsr;						\
+  uint64_t restore_fcc[8];						\
+  SAVE_REGISTER (restore);						\
+  SAVE_REGISTER_FCSR ();						\
+  SAVE_REGISTER_FCC(restore_fcc)					\
+									\
+  /* memcmp_lasx/strlen_lasx corrupts LSX/LASX registers, */		\
+  /* compare LSX/LASX registers first.  */				\
+  if (hwcap & HWCAP_LOONGARCH_LASX)					\
+    {									\
+      SAVE_REGISTER_LASX ();						\
+    }									\
+  else if (hwcap & HWCAP_LOONGARCH_LSX)					\
+    {									\
+      SAVE_REGISTER_LSX	();						\
+    }									\
+  else									\
+    {									\
+      double restore_float[32];						\
+      SAVE_REGISTER_FLOAT ();						\
+									\
+      for (int i = 0; i < 32; i++)					\
+       if (src_float[i] != restore_float[i])				\
+	abort ();							\
+    }									\
+									\
+  if (src_fcsr != restore_fcsr)						\
+    abort ();								\
+									\
+  if (memcmp (src_fcc, restore_fcc, sizeof (src_fcc)) != 0)		\
+    abort ();								\
+									\
+  if (src != restore)							\
+    abort ();
+
+#endif /* #ifdef __loongarch_soft_float */
+
+#include_next <tst-gnu2-tls2.h>
diff --git a/sysdeps/m68k/m680x0/fpu/w_exp10_compat.c b/sysdeps/m68k/m680x0/fpu/w_exp10_compat.c
index 0d3e718626..350f2e4b4d 100644
--- a/sysdeps/m68k/m680x0/fpu/w_exp10_compat.c
+++ b/sysdeps/m68k/m680x0/fpu/w_exp10_compat.c
@@ -1,3 +1,8 @@
 /* m68k provides an optimized __ieee754_exp10.  */
-#define NO_COMPAT_NEEDED 1
-#include <math/w_exp10_compat.c>
+#ifdef SHARED
+# define NO_COMPAT_NEEDED 1
+# include <math/w_exp10_compat.c>
+#else
+# include <math-type-macros-double.h>
+# include <w_exp10_template.c>
+#endif
diff --git a/sysdeps/m68k/m680x0/fpu/w_fmod_compat.c b/sysdeps/m68k/m680x0/fpu/w_fmod_compat.c
index 527d4fbed2..57f38091e6 100644
--- a/sysdeps/m68k/m680x0/fpu/w_fmod_compat.c
+++ b/sysdeps/m68k/m680x0/fpu/w_fmod_compat.c
@@ -7,8 +7,9 @@
 # define LIBM_SVID_COMPAT 1
 # undef compat_symbol
 # define compat_symbol(a, b, c, d)
-#endif
 #include <math/w_fmod_compat.c>
-#ifdef SHARED
 libm_alias_double (__fmod_compat, fmod)
+#else
+#include <math-type-macros-double.h>
+#include <w_fmod_template.c>
 #endif
diff --git a/sysdeps/m68k/m680x0/fpu/w_fmodf_compat.c b/sysdeps/m68k/m680x0/fpu/w_fmodf_compat.c
index 5043586b91..88db07f443 100644
--- a/sysdeps/m68k/m680x0/fpu/w_fmodf_compat.c
+++ b/sysdeps/m68k/m680x0/fpu/w_fmodf_compat.c
@@ -7,8 +7,9 @@
 # define LIBM_SVID_COMPAT 1
 # undef compat_symbol
 # define compat_symbol(a, b, c, d)
-#endif
-#include <math/w_fmodf_compat.c>
-#ifdef SHARED
+# include <math/w_fmodf_compat.c>
 libm_alias_float (__fmod_compat, fmod)
+#else
+#include <math-type-macros-float.h>
+#include <w_fmod_template.c>
 #endif
diff --git a/sysdeps/mach/hurd/bits/socket.h b/sysdeps/mach/hurd/bits/socket.h
index 3e72f9fa93..b5eeac3731 100644
--- a/sysdeps/mach/hurd/bits/socket.h
+++ b/sysdeps/mach/hurd/bits/socket.h
@@ -153,7 +153,7 @@ enum __socket_type
 #include <bits/sockaddr.h>
 
 /* Structure describing a generic socket address.  */
-struct sockaddr
+struct __attribute_struct_may_alias__ sockaddr
   {
     __SOCKADDR_COMMON (sa_);	/* Common data: address family and length.  */
     char sa_data[14];		/* Address data.  */
@@ -170,7 +170,7 @@ struct sockaddr
 #define _SS_PADSIZE \
   (_SS_SIZE - __SOCKADDR_COMMON_SIZE - sizeof (__ss_aligntype))
 
-struct sockaddr_storage
+struct __attribute_struct_may_alias__ sockaddr_storage
   {
     __SOCKADDR_COMMON (ss_);	/* Address family, etc.  */
     char __ss_padding[_SS_PADSIZE];
diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist
index 8f40ddb150..113daa4b44 100644
--- a/sysdeps/mach/hurd/i386/libm.abilist
+++ b/sysdeps/mach/hurd/i386/libm.abilist
@@ -1181,3 +1181,11 @@ GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
 GLIBC_2.35 hypot F
 GLIBC_2.35 hypotf F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/mach/hurd/lsetxattr.c b/sysdeps/mach/hurd/lsetxattr.c
index 3f5d335a1a..49487f78fb 100644
--- a/sysdeps/mach/hurd/lsetxattr.c
+++ b/sysdeps/mach/hurd/lsetxattr.c
@@ -32,5 +32,5 @@ lsetxattr (const char *path, const char *name, const void *value, size_t size,
     return -1;
   err = _hurd_xattr_set (port, name, value, size, flags);
   __mach_port_deallocate (__mach_task_self (), port);
-  return err ? __hurd_fail (err) : size;
+  return __hurd_fail (err);
 }
diff --git a/sysdeps/mach/hurd/x86_64/libm.abilist b/sysdeps/mach/hurd/x86_64/libm.abilist
index 5c762b937b..0c7ffd4e3f 100644
--- a/sysdeps/mach/hurd/x86_64/libm.abilist
+++ b/sysdeps/mach/hurd/x86_64/libm.abilist
@@ -1038,3 +1038,11 @@ GLIBC_2.38 ynf32x F
 GLIBC_2.38 ynf64 F
 GLIBC_2.38 ynf64x F
 GLIBC_2.38 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/or1k/fpu/fclrexcpt.c b/sysdeps/or1k/fpu/fclrexcpt.c
new file mode 100644
index 0000000000..44224f9c24
--- /dev/null
+++ b/sysdeps/or1k/fpu/fclrexcpt.c
@@ -0,0 +1,44 @@
+/* Clear given exceptions in current floating-point environment.
+   OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+feclearexcept (int excepts)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  /* Mask out unsupported bits/exceptions.  */
+  excepts &= FE_ALL_EXCEPT;
+
+  /* Read the complete control word.  */
+  _FPU_GETCW (cw);
+
+  cw_new = cw & ~excepts;
+
+  /* Put the new data in effect.  */
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+
+  /* Success.  */
+  return 0;
+}
+libm_hidden_def (feclearexcept)
diff --git a/sysdeps/or1k/fpu/fegetenv.c b/sysdeps/or1k/fpu/fegetenv.c
new file mode 100644
index 0000000000..70c75aa0bf
--- /dev/null
+++ b/sysdeps/or1k/fpu/fegetenv.c
@@ -0,0 +1,32 @@
+/* Store current floating-point environment.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+__fegetenv (fenv_t *envp)
+{
+  _FPU_GETCW (*envp);
+
+  /* Success.  */
+  return 0;
+}
+libm_hidden_def (__fegetenv)
+weak_alias (__fegetenv, fegetenv)
+libm_hidden_weak (fegetenv)
diff --git a/sysdeps/or1k/fpu/fegetmode.c b/sysdeps/or1k/fpu/fegetmode.c
new file mode 100644
index 0000000000..7fffd2e0b5
--- /dev/null
+++ b/sysdeps/or1k/fpu/fegetmode.c
@@ -0,0 +1,29 @@
+/* Store current floating-point control modes.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fegetmode (femode_t *modep)
+{
+  _FPU_GETCW (*modep);
+
+  /* Success.  */
+  return 0;
+}
diff --git a/sysdeps/or1k/fpu/fegetround.c b/sysdeps/or1k/fpu/fegetround.c
new file mode 100644
index 0000000000..7e993b980a
--- /dev/null
+++ b/sysdeps/or1k/fpu/fegetround.c
@@ -0,0 +1,29 @@
+/* Return current rounding direction.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <get-rounding-mode.h>
+
+int
+__fegetround (void)
+{
+  return get_rounding_mode ();
+}
+libm_hidden_def (__fegetround)
+weak_alias (__fegetround, fegetround)
+libm_hidden_weak (fegetround)
diff --git a/sysdeps/or1k/fpu/feholdexcpt.c b/sysdeps/or1k/fpu/feholdexcpt.c
new file mode 100644
index 0000000000..0036e41ba2
--- /dev/null
+++ b/sysdeps/or1k/fpu/feholdexcpt.c
@@ -0,0 +1,33 @@
+/* Store current floating-point environment and clear exceptions.
+   OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fenv_private.h>
+
+int
+__feholdexcept (fenv_t *envp)
+{
+  libc_feholdexcept_or1k (envp);
+
+  /* Success.  */
+  return 0;
+}
+libm_hidden_def (__feholdexcept)
+weak_alias (__feholdexcept, feholdexcept)
+libm_hidden_weak (feholdexcept)
diff --git a/sysdeps/or1k/fpu/fenv_private.h b/sysdeps/or1k/fpu/fenv_private.h
new file mode 100644
index 0000000000..4f401e7a5a
--- /dev/null
+++ b/sysdeps/or1k/fpu/fenv_private.h
@@ -0,0 +1,199 @@
+/* Private floating point rounding and exceptions handling.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef OR1K_FENV_PRIVATE_H
+#define OR1K_FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+static __always_inline void
+libc_feholdexcept_or1k (fenv_t *envp)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  /* Get and store the environment.  */
+  _FPU_GETCW (cw);
+  *envp = cw;
+
+  /* Clear the exception status flags.  */
+  cw_new = cw & ~FE_ALL_EXCEPT;
+
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+}
+
+#define libc_feholdexcept  libc_feholdexcept_or1k
+#define libc_feholdexceptf libc_feholdexcept_or1k
+#define libc_feholdexceptl libc_feholdexcept_or1k
+
+static __always_inline void
+libc_fesetround_or1k (int round)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  _FPU_GETCW (cw);
+  cw_new = cw & ~_FPU_FPCSR_RM_MASK;
+  cw_new |= round;
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+}
+
+#define libc_fesetround  libc_fesetround_or1k
+#define libc_fesetroundf libc_fesetround_or1k
+#define libc_fesetroundl libc_fesetround_or1k
+
+static __always_inline void
+libc_feholdexcept_setround_or1k (fenv_t *envp, int round)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  /* Get and store the environment.  */
+  _FPU_GETCW (cw);
+  *envp = cw;
+
+  /* Clear the status flags and rounding mode.  */
+  cw_new = cw & ~(FE_ALL_EXCEPT | _FPU_FPCSR_RM_MASK);
+
+  /* Set rounding mode.  */
+  cw_new |= round;
+
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+}
+
+#define libc_feholdexcept_setround  libc_feholdexcept_setround_or1k
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround_or1k
+#define libc_feholdexcept_setroundl libc_feholdexcept_setround_or1k
+
+static __always_inline int
+libc_fetestexcept_or1k (int ex)
+{
+  fpu_control_t cw;
+
+  /* Get current control word.  */
+  _FPU_GETCW (cw);
+
+  /* Check if any of the queried exception flags are set.  */
+  return cw & ex & FE_ALL_EXCEPT;
+}
+
+#define libc_fetestexcept  libc_fetestexcept_or1k
+#define libc_fetestexceptf libc_fetestexcept_or1k
+#define libc_fetestexceptl libc_fetestexcept_or1k
+
+static __always_inline void
+libc_fesetenv_or1k (const fenv_t *envp)
+{
+  if (envp == FE_DFL_ENV)
+    _FPU_SETCW (_FPU_DEFAULT);
+  else
+    _FPU_SETCW (*envp);
+}
+
+#define libc_fesetenv  libc_fesetenv_or1k
+#define libc_fesetenvf libc_fesetenv_or1k
+#define libc_fesetenvl libc_fesetenv_or1k
+#define libc_feresetround_noex  libc_fesetenv_or1k
+#define libc_feresetround_noexf libc_fesetenv_or1k
+#define libc_feresetround_noexl libc_fesetenv_or1k
+
+static __always_inline int
+libc_feupdateenv_test_or1k (const fenv_t *envp, int ex)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+  int excepts;
+
+  /* Get current control word.  */
+  _FPU_GETCW (cw);
+
+  /* Merge current exception flags with the passed fenv.  */
+  excepts = cw & FE_ALL_EXCEPT;
+  cw_new = (envp == FE_DFL_ENV ? _FPU_DEFAULT : *envp) | excepts;
+
+  if (__glibc_unlikely (cw != cw_new))
+    _FPU_SETCW (cw_new);
+
+  /* Raise the exceptions if enabled in the new FP state.  */
+  if (__glibc_unlikely (excepts))
+    __feraiseexcept (excepts);
+
+  return excepts & ex;
+}
+
+#define libc_feupdateenv_test  libc_feupdateenv_test_or1k
+#define libc_feupdateenv_testf libc_feupdateenv_test_or1k
+#define libc_feupdateenv_testl libc_feupdateenv_test_or1k
+
+static __always_inline void
+libc_feupdateenv_or1k (const fenv_t *envp)
+{
+  libc_feupdateenv_test_or1k (envp, 0);
+}
+
+#define libc_feupdateenv  libc_feupdateenv_or1k
+#define libc_feupdateenvf libc_feupdateenv_or1k
+#define libc_feupdateenvl libc_feupdateenv_or1k
+
+static __always_inline void
+libc_feholdsetround_or1k (fenv_t *envp, int round)
+{
+  fpu_control_t cw;
+
+  _FPU_GETCW (cw);
+  *envp = cw;
+
+  /* Check whether rounding modes are different.  */
+  round = (cw ^ round) & _FPU_FPCSR_RM_MASK;
+
+  /* Set new rounding mode if different.  */
+  if (__glibc_unlikely (round != 0))
+    _FPU_SETCW (cw ^ round);
+}
+
+#define libc_feholdsetround  libc_feholdsetround_or1k
+#define libc_feholdsetroundf libc_feholdsetround_or1k
+#define libc_feholdsetroundl libc_feholdsetround_or1k
+
+static __always_inline void
+libc_feresetround_or1k (fenv_t *envp)
+{
+  fpu_control_t cw;
+  int round;
+
+  _FPU_GETCW (cw);
+
+  /* Check whether rounding modes are different.  */
+  round = (*envp ^ cw) & _FPU_FPCSR_RM_MASK;
+
+  /* Restore the rounding mode if it was changed.  */
+  if (__glibc_unlikely (round != 0))
+    _FPU_SETCW (cw ^ round);
+}
+
+#define libc_feresetround  libc_feresetround_or1k
+#define libc_feresetroundf libc_feresetround_or1k
+#define libc_feresetroundl libc_feresetround_or1k
+
+#include_next <fenv_private.h>
+
+#endif
diff --git a/sysdeps/or1k/fpu/fesetenv.c b/sysdeps/or1k/fpu/fesetenv.c
new file mode 100644
index 0000000000..742ca719e0
--- /dev/null
+++ b/sysdeps/or1k/fpu/fesetenv.c
@@ -0,0 +1,32 @@
+/* Install given floating-point environment.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fenv_private.h>
+
+int
+__fesetenv (const fenv_t *envp)
+{
+  libc_fesetenv_or1k (envp);
+
+  /* Success.  */
+  return 0;
+}
+libm_hidden_def (__fesetenv)
+weak_alias (__fesetenv, fesetenv)
+libm_hidden_weak (fesetenv)
diff --git a/sysdeps/or1k/fpu/fesetexcept.c b/sysdeps/or1k/fpu/fesetexcept.c
new file mode 100644
index 0000000000..43734eac18
--- /dev/null
+++ b/sysdeps/or1k/fpu/fesetexcept.c
@@ -0,0 +1,35 @@
+/* Set given exception flags.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fesetexcept (int excepts)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  _FPU_GETCW (cw);
+  cw_new = cw | (excepts & FE_ALL_EXCEPT);
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+
+  /* Success.  */
+  return 0;
+}
diff --git a/sysdeps/or1k/fpu/fesetmode.c b/sysdeps/or1k/fpu/fesetmode.c
new file mode 100644
index 0000000000..d4556927ce
--- /dev/null
+++ b/sysdeps/or1k/fpu/fesetmode.c
@@ -0,0 +1,39 @@
+/* Install given floating-point control modes.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fesetmode (const femode_t *modep)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  _FPU_GETCW (cw);
+  cw_new = cw & ~_FPU_FPCSR_RM_MASK;
+  if (modep == FE_DFL_MODE)
+    cw_new |= (_FPU_DEFAULT & _FPU_FPCSR_RM_MASK);
+  else
+    cw_new |= (*modep & _FPU_FPCSR_RM_MASK);
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+
+  /* Success.  */
+  return 0;
+}
diff --git a/sysdeps/or1k/fpu/fesetround.c b/sysdeps/or1k/fpu/fesetround.c
new file mode 100644
index 0000000000..c2ada98f1b
--- /dev/null
+++ b/sysdeps/or1k/fpu/fesetround.c
@@ -0,0 +1,39 @@
+/* Set current rounding direction.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fenv_private.h>
+
+int
+__fesetround (int round)
+{
+  switch (round)
+    {
+    case FE_TONEAREST:
+    case FE_TOWARDZERO:
+    case FE_DOWNWARD:
+    case FE_UPWARD:
+      libc_fesetround_or1k (round);
+      return 0;
+    default:
+      return round; /* A nonzero value.  */
+    }
+}
+libm_hidden_def (__fesetround)
+weak_alias (__fesetround, fesetround)
+libm_hidden_weak (fesetround)
diff --git a/sysdeps/or1k/fpu/feupdateenv.c b/sysdeps/or1k/fpu/feupdateenv.c
new file mode 100644
index 0000000000..3355bf6596
--- /dev/null
+++ b/sysdeps/or1k/fpu/feupdateenv.c
@@ -0,0 +1,33 @@
+/* Install given floating-point environment and raise exceptions.
+   OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fenv_private.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+  libc_feupdateenv_or1k (envp);
+
+  /* Success.  */
+  return 0;
+}
+libm_hidden_def (__feupdateenv)
+weak_alias (__feupdateenv, feupdateenv)
+libm_hidden_weak (feupdateenv)
diff --git a/sysdeps/or1k/fpu/fgetexcptflg.c b/sysdeps/or1k/fpu/fgetexcptflg.c
new file mode 100644
index 0000000000..a954f6a2f1
--- /dev/null
+++ b/sysdeps/or1k/fpu/fgetexcptflg.c
@@ -0,0 +1,29 @@
+/* Store current state of exceptions.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fenv_private.h>
+
+int
+fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+  *flagp = libc_fetestexcept_or1k (excepts);
+
+  /* Success.  */
+  return 0;
+}
diff --git a/sysdeps/or1k/fpu/fix-fp-int-convert-overflow.h b/sysdeps/or1k/fpu/fix-fp-int-convert-overflow.h
new file mode 100644
index 0000000000..78104289c0
--- /dev/null
+++ b/sysdeps/or1k/fpu/fix-fp-int-convert-overflow.h
@@ -0,0 +1,38 @@
+/* Fix for conversion of floating point to integer overflow.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef FIX_FP_INT_CONVERT_OVERFLOW_H
+#define FIX_FP_INT_CONVERT_OVERFLOW_H	1
+
+/* The generic libgcc2.c conversions from floating point to long long
+   may not raise the correct exceptions on overflow (and may raise
+   spurious "inexact" exceptions even in non-overflow cases, see
+   <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>).  */
+#define FIX_FLT_LONG_CONVERT_OVERFLOW 0
+#define FIX_FLT_LLONG_CONVERT_OVERFLOW 1
+
+#define FIX_DBL_LONG_CONVERT_OVERFLOW 0
+#define FIX_DBL_LLONG_CONVERT_OVERFLOW 1
+
+#define FIX_LDBL_LONG_CONVERT_OVERFLOW 0
+#define FIX_LDBL_LLONG_CONVERT_OVERFLOW 0
+
+#define FIX_FLT128_LONG_CONVERT_OVERFLOW 0
+#define FIX_FLT128_LLONG_CONVERT_OVERFLOW 0
+
+#endif /* fix-fp-int-convert-overflow.h */
diff --git a/sysdeps/or1k/fpu/fraiseexcpt.c b/sysdeps/or1k/fpu/fraiseexcpt.c
new file mode 100644
index 0000000000..bbacfd50bc
--- /dev/null
+++ b/sysdeps/or1k/fpu/fraiseexcpt.c
@@ -0,0 +1,67 @@
+/* Raise given exceptions.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <float.h>
+#include <math.h>
+
+int
+__feraiseexcept (int excepts)
+{
+  if (excepts == 0)
+    return 0;
+
+  /* Raise exceptions represented by EXPECTS.  */
+
+  if (excepts & FE_INEXACT)
+  {
+    float d = 1.0, x = 3.0;
+    __asm__ volatile ("lf.div.s %0, %0, %1" : "+r" (d) : "r" (x));
+  }
+
+  if (excepts & FE_UNDERFLOW)
+  {
+    float d = FLT_MIN;
+    __asm__ volatile ("lf.mul.s %0, %0, %0" : "+r" (d));
+  }
+
+  if (excepts & FE_OVERFLOW)
+  {
+    float d = FLT_MAX;
+    __asm__ volatile ("lf.mul.s %0, %0, %0" : "+r" (d) : "r" (d));
+  }
+
+  if (excepts & FE_DIVBYZERO)
+  {
+    float d = 1.0, x = 0.0;
+    __asm__ volatile ("lf.div.s %0, %0, %1" : "+r" (d) : "r" (x));
+  }
+
+  if (excepts & FE_INVALID)
+  {
+    float d = HUGE_VAL, x = 0.0;
+    __asm__ volatile ("lf.mul.s %0, %1, %0" : "+r" (d) : "r" (x));
+  }
+
+  /* Success.  */
+  return 0;
+}
+libm_hidden_def (__feraiseexcept)
+weak_alias (__feraiseexcept, feraiseexcept)
+libm_hidden_weak (feraiseexcept)
diff --git a/sysdeps/or1k/fpu/fsetexcptflg.c b/sysdeps/or1k/fpu/fsetexcptflg.c
new file mode 100644
index 0000000000..c327e4c5d1
--- /dev/null
+++ b/sysdeps/or1k/fpu/fsetexcptflg.c
@@ -0,0 +1,43 @@
+/* Set floating-point environment exception handling.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+  fpu_control_t cw;
+  fpu_control_t cw_new;
+
+  /* Get the current exceptions.  */
+  _FPU_GETCW (cw);
+
+  /* Make sure the flags we want restored are legal.  */
+  excepts &= FE_ALL_EXCEPT;
+
+  /* Now set selected bits from flagp. Note that we ignore all non-flag
+     bits from *flagp, so they don't matter.  */
+  cw_new = (cw & ~excepts) | (*flagp & excepts);
+
+  if (cw != cw_new)
+    _FPU_SETCW (cw_new);
+
+  /* Success.  */
+  return 0;
+}
diff --git a/sysdeps/or1k/fpu/ftestexcept.c b/sysdeps/or1k/fpu/ftestexcept.c
new file mode 100644
index 0000000000..59f06afa22
--- /dev/null
+++ b/sysdeps/or1k/fpu/ftestexcept.c
@@ -0,0 +1,27 @@
+/* Test exception in current environment.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fenv_private.h>
+
+int
+fetestexcept (int excepts)
+{
+  return libc_fetestexcept_or1k (excepts);
+}
+libm_hidden_def (fetestexcept)
diff --git a/sysdeps/or1k/fpu/get-rounding-mode.h b/sysdeps/or1k/fpu/get-rounding-mode.h
new file mode 100644
index 0000000000..a66d553be8
--- /dev/null
+++ b/sysdeps/or1k/fpu/get-rounding-mode.h
@@ -0,0 +1,38 @@
+/* Determine floating-point rounding mode within libc.  OpenRISC version.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _OR1K_GET_ROUNDING_MODE_H
+#define _OR1K_GET_ROUNDING_MODE_H	1
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+/* Return the floating-point rounding mode.  */
+
+static inline int
+get_rounding_mode (void)
+{
+  fpu_control_t cw;
+
+  _FPU_GETCW (cw);
+  return cw & _FPU_FPCSR_RM_MASK;
+}
+
+#endif /* get-rounding-mode.h */
diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps
new file mode 100644
index 0000000000..948ec01cdc
--- /dev/null
+++ b/sysdeps/or1k/fpu/libm-test-ulps
@@ -0,0 +1,1115 @@
+# Begin of automatic generation
+
+# Maximal error of functions:
+Function: "acos":
+double: 1
+float: 1
+
+Function: "acos_downward":
+double: 1
+float: 1
+
+Function: "acos_towardzero":
+double: 1
+float: 1
+
+Function: "acos_upward":
+double: 1
+float: 1
+
+Function: "acosh":
+double: 2
+float: 2
+
+Function: "acosh_downward":
+double: 2
+float: 2
+
+Function: "acosh_towardzero":
+double: 2
+float: 2
+
+Function: "acosh_upward":
+double: 2
+float: 2
+
+Function: "asin":
+double: 1
+float: 1
+
+Function: "asin_downward":
+double: 1
+float: 1
+
+Function: "asin_towardzero":
+double: 1
+float: 1
+
+Function: "asin_upward":
+double: 2
+float: 1
+
+Function: "asinh":
+double: 2
+float: 2
+
+Function: "asinh_downward":
+double: 3
+float: 3
+
+Function: "asinh_towardzero":
+double: 2
+float: 2
+
+Function: "asinh_upward":
+double: 3
+float: 3
+
+Function: "atan":
+double: 1
+float: 1
+
+Function: "atan2":
+float: 2
+
+Function: "atan2_downward":
+double: 5
+float: 2
+
+Function: "atan2_towardzero":
+double: 5
+float: 2
+
+Function: "atan2_upward":
+double: 8
+float: 2
+
+Function: "atan_downward":
+double: 1
+float: 2
+
+Function: "atan_towardzero":
+double: 1
+float: 1
+
+Function: "atan_upward":
+double: 1
+float: 2
+
+Function: "atanh":
+double: 2
+float: 2
+
+Function: "atanh_downward":
+double: 3
+float: 3
+
+Function: "atanh_towardzero":
+double: 2
+float: 2
+
+Function: "atanh_upward":
+double: 3
+float: 3
+
+Function: "cabs":
+double: 1
+
+Function: "cabs_downward":
+double: 1
+
+Function: "cabs_towardzero":
+double: 1
+
+Function: "cabs_upward":
+double: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 2
+
+Function: Imaginary part of "cacos":
+double: 2
+float: 2
+
+Function: Real part of "cacos_downward":
+double: 3
+float: 2
+
+Function: Imaginary part of "cacos_downward":
+double: 5
+float: 3
+
+Function: Real part of "cacos_towardzero":
+double: 3
+float: 2
+
+Function: Imaginary part of "cacos_towardzero":
+double: 4
+float: 2
+
+Function: Real part of "cacos_upward":
+double: 2
+float: 2
+
+Function: Imaginary part of "cacos_upward":
+double: 5
+float: 7
+
+Function: Real part of "cacosh":
+double: 2
+float: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 2
+
+Function: Real part of "cacosh_downward":
+double: 4
+float: 2
+
+Function: Imaginary part of "cacosh_downward":
+double: 3
+float: 3
+
+Function: Real part of "cacosh_towardzero":
+double: 4
+float: 2
+
+Function: Imaginary part of "cacosh_towardzero":
+double: 3
+float: 2
+
+Function: Real part of "cacosh_upward":
+double: 4
+float: 3
+
+Function: Imaginary part of "cacosh_upward":
+double: 3
+float: 2
+
+Function: "carg":
+float: 1
+
+Function: "carg_downward":
+double: 5
+float: 2
+
+Function: "carg_towardzero":
+double: 5
+float: 2
+
+Function: "carg_upward":
+double: 8
+float: 2
+
+Function: Real part of "casin":
+double: 1
+float: 1
+
+Function: Imaginary part of "casin":
+double: 2
+float: 2
+
+Function: Real part of "casin_downward":
+double: 3
+float: 2
+
+Function: Imaginary part of "casin_downward":
+double: 5
+float: 3
+
+Function: Real part of "casin_towardzero":
+double: 3
+float: 1
+
+Function: Imaginary part of "casin_towardzero":
+double: 4
+float: 2
+
+Function: Real part of "casin_upward":
+double: 3
+float: 2
+
+Function: Imaginary part of "casin_upward":
+double: 5
+float: 7
+
+Function: Real part of "casinh":
+double: 2
+float: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+
+Function: Real part of "casinh_downward":
+double: 5
+float: 3
+
+Function: Imaginary part of "casinh_downward":
+double: 3
+float: 2
+
+Function: Real part of "casinh_towardzero":
+double: 4
+float: 2
+
+Function: Imaginary part of "casinh_towardzero":
+double: 3
+float: 1
+
+Function: Real part of "casinh_upward":
+double: 5
+float: 7
+
+Function: Imaginary part of "casinh_upward":
+double: 3
+float: 2
+
+Function: Real part of "catan":
+double: 1
+float: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+
+Function: Real part of "catan_downward":
+double: 1
+float: 2
+
+Function: Imaginary part of "catan_downward":
+double: 2
+float: 2
+
+Function: Real part of "catan_towardzero":
+double: 1
+float: 2
+
+Function: Imaginary part of "catan_towardzero":
+double: 2
+float: 2
+
+Function: Real part of "catan_upward":
+double: 2
+float: 1
+
+Function: Imaginary part of "catan_upward":
+double: 2
+float: 2
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+
+Function: Imaginary part of "catanh":
+double: 1
+float: 1
+
+Function: Real part of "catanh_downward":
+double: 2
+float: 2
+
+Function: Imaginary part of "catanh_downward":
+double: 1
+float: 2
+
+Function: Real part of "catanh_towardzero":
+double: 2
+float: 2
+
+Function: Imaginary part of "catanh_towardzero":
+double: 1
+float: 2
+
+Function: Real part of "catanh_upward":
+double: 4
+float: 4
+
+Function: Imaginary part of "catanh_upward":
+double: 2
+float: 1
+
+Function: "cbrt":
+double: 4
+float: 1
+
+Function: "cbrt_downward":
+double: 4
+float: 1
+
+Function: "cbrt_towardzero":
+double: 3
+float: 1
+
+Function: "cbrt_upward":
+double: 5
+float: 1
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+
+Function: Real part of "ccos_downward":
+double: 3
+float: 1
+
+Function: Imaginary part of "ccos_downward":
+double: 3
+float: 3
+
+Function: Real part of "ccos_towardzero":
+double: 3
+float: 2
+
+Function: Imaginary part of "ccos_towardzero":
+double: 3
+float: 3
+
+Function: Real part of "ccos_upward":
+double: 1
+float: 2
+
+Function: Imaginary part of "ccos_upward":
+double: 2
+float: 2
+
+Function: Real part of "ccosh":
+double: 2
+float: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+
+Function: Real part of "ccosh_downward":
+double: 3
+float: 2
+
+Function: Imaginary part of "ccosh_downward":
+double: 3
+float: 3
+
+Function: Real part of "ccosh_towardzero":
+double: 3
+float: 3
+
+Function: Imaginary part of "ccosh_towardzero":
+double: 3
+float: 3
+
+Function: Real part of "ccosh_upward":
+double: 1
+float: 2
+
+Function: Imaginary part of "ccosh_upward":
+double: 2
+float: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+
+Function: Real part of "cexp_downward":
+double: 4
+float: 2
+
+Function: Imaginary part of "cexp_downward":
+double: 3
+float: 3
+
+Function: Real part of "cexp_towardzero":
+double: 4
+float: 2
+
+Function: Imaginary part of "cexp_towardzero":
+double: 3
+float: 3
+
+Function: Real part of "cexp_upward":
+double: 2
+float: 2
+
+Function: Imaginary part of "cexp_upward":
+double: 3
+float: 2
+
+Function: Real part of "clog":
+double: 3
+float: 3
+
+Function: Imaginary part of "clog":
+double: 1
+float: 1
+
+Function: Real part of "clog10":
+double: 3
+float: 4
+
+Function: Imaginary part of "clog10":
+double: 2
+float: 2
+
+Function: Real part of "clog10_downward":
+double: 7
+float: 5
+
+Function: Imaginary part of "clog10_downward":
+double: 8
+float: 4
+
+Function: Real part of "clog10_towardzero":
+double: 5
+float: 5
+
+Function: Imaginary part of "clog10_towardzero":
+double: 8
+float: 4
+
+Function: Real part of "clog10_upward":
+double: 6
+float: 5
+
+Function: Imaginary part of "clog10_upward":
+double: 5
+float: 4
+
+Function: Real part of "clog_downward":
+double: 4
+float: 3
+
+Function: Imaginary part of "clog_downward":
+double: 5
+float: 2
+
+Function: Real part of "clog_towardzero":
+double: 8
+float: 4
+
+Function: Imaginary part of "clog_towardzero":
+double: 5
+float: 3
+
+Function: Real part of "clog_upward":
+double: 8
+float: 3
+
+Function: Imaginary part of "clog_upward":
+double: 8
+float: 2
+
+Function: "cos":
+double: 1
+float: 1
+
+Function: "cos_downward":
+double: 1
+float: 1
+
+Function: "cos_towardzero":
+double: 4
+float: 1
+
+Function: "cos_upward":
+double: 4
+float: 1
+
+Function: "cosh":
+double: 2
+float: 2
+
+Function: "cosh_downward":
+double: 3
+float: 1
+
+Function: "cosh_towardzero":
+double: 3
+float: 1
+
+Function: "cosh_upward":
+double: 2
+float: 2
+
+Function: Real part of "cpow":
+double: 2
+float: 5
+
+Function: Imaginary part of "cpow":
+float: 2
+
+Function: Real part of "cpow_downward":
+double: 5
+float: 8
+
+Function: Imaginary part of "cpow_downward":
+double: 2
+float: 2
+
+Function: Real part of "cpow_towardzero":
+double: 5
+float: 8
+
+Function: Imaginary part of "cpow_towardzero":
+double: 2
+float: 2
+
+Function: Real part of "cpow_upward":
+double: 4
+float: 1
+
+Function: Imaginary part of "cpow_upward":
+double: 2
+float: 2
+
+Function: Real part of "csin":
+double: 1
+float: 1
+
+Function: Real part of "csin_downward":
+double: 3
+float: 3
+
+Function: Imaginary part of "csin_downward":
+double: 3
+float: 1
+
+Function: Real part of "csin_towardzero":
+double: 3
+float: 3
+
+Function: Imaginary part of "csin_towardzero":
+double: 3
+float: 1
+
+Function: Real part of "csin_upward":
+double: 2
+float: 2
+
+Function: Imaginary part of "csin_upward":
+double: 1
+float: 2
+
+Function: Real part of "csinh":
+float: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+
+Function: Real part of "csinh_downward":
+double: 3
+float: 1
+
+Function: Imaginary part of "csinh_downward":
+double: 3
+float: 3
+
+Function: Real part of "csinh_towardzero":
+double: 3
+float: 2
+
+Function: Imaginary part of "csinh_towardzero":
+double: 3
+float: 3
+
+Function: Real part of "csinh_upward":
+double: 1
+float: 2
+
+Function: Imaginary part of "csinh_upward":
+double: 2
+float: 2
+
+Function: Real part of "csqrt":
+double: 2
+float: 2
+
+Function: Imaginary part of "csqrt":
+double: 2
+float: 2
+
+Function: Real part of "csqrt_downward":
+double: 5
+float: 4
+
+Function: Imaginary part of "csqrt_downward":
+double: 4
+float: 3
+
+Function: Real part of "csqrt_towardzero":
+double: 4
+float: 3
+
+Function: Imaginary part of "csqrt_towardzero":
+double: 4
+float: 3
+
+Function: Real part of "csqrt_upward":
+double: 5
+float: 4
+
+Function: Imaginary part of "csqrt_upward":
+double: 3
+float: 3
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+
+Function: Imaginary part of "ctan":
+double: 2
+float: 2
+
+Function: Real part of "ctan_downward":
+double: 6
+float: 5
+
+Function: Imaginary part of "ctan_downward":
+double: 3
+float: 2
+
+Function: Real part of "ctan_towardzero":
+double: 5
+float: 3
+
+Function: Imaginary part of "ctan_towardzero":
+double: 3
+float: 2
+
+Function: Real part of "ctan_upward":
+double: 2
+float: 4
+
+Function: Imaginary part of "ctan_upward":
+double: 2
+float: 1
+
+Function: Real part of "ctanh":
+double: 2
+float: 2
+
+Function: Imaginary part of "ctanh":
+double: 2
+float: 2
+
+Function: Real part of "ctanh_downward":
+double: 3
+float: 2
+
+Function: Imaginary part of "ctanh_downward":
+double: 6
+float: 5
+
+Function: Real part of "ctanh_towardzero":
+double: 3
+float: 2
+
+Function: Imaginary part of "ctanh_towardzero":
+double: 5
+float: 3
+
+Function: Real part of "ctanh_upward":
+double: 2
+float: 2
+
+Function: Imaginary part of "ctanh_upward":
+double: 2
+float: 3
+
+Function: "erf":
+double: 1
+float: 1
+
+Function: "erf_downward":
+double: 1
+float: 1
+
+Function: "erf_towardzero":
+double: 1
+float: 1
+
+Function: "erf_upward":
+double: 1
+float: 1
+
+Function: "erfc":
+double: 5
+float: 3
+
+Function: "erfc_downward":
+double: 5
+float: 6
+
+Function: "erfc_towardzero":
+double: 3
+float: 4
+
+Function: "erfc_upward":
+double: 5
+float: 6
+
+Function: "exp":
+double: 1
+float: 1
+
+Function: "exp10":
+double: 2
+float: 1
+
+Function: "exp10_downward":
+double: 3
+float: 1
+
+Function: "exp10_towardzero":
+double: 3
+float: 1
+
+Function: "exp10_upward":
+double: 2
+float: 1
+
+Function: "exp2":
+double: 1
+
+Function: "exp2_downward":
+double: 1
+
+Function: "exp2_towardzero":
+double: 1
+
+Function: "exp2_upward":
+double: 1
+float: 1
+
+Function: "exp_downward":
+double: 1
+float: 1
+
+Function: "exp_towardzero":
+double: 1
+float: 1
+
+Function: "exp_upward":
+double: 1
+float: 1
+
+Function: "expm1":
+double: 1
+float: 1
+
+Function: "expm1_downward":
+double: 1
+float: 1
+
+Function: "expm1_towardzero":
+double: 1
+float: 2
+
+Function: "expm1_upward":
+double: 1
+float: 1
+
+Function: "gamma":
+double: 4
+float: 7
+
+Function: "gamma_downward":
+double: 7
+float: 7
+
+Function: "gamma_towardzero":
+double: 7
+float: 7
+
+Function: "gamma_upward":
+double: 5
+float: 6
+
+Function: "hypot":
+double: 1
+float: 1
+
+Function: "hypot_downward":
+double: 1
+
+Function: "hypot_towardzero":
+double: 1
+
+Function: "hypot_upward":
+double: 1
+
+Function: "j0":
+double: 2
+float: 9
+
+Function: "j0_downward":
+double: 5
+float: 9
+
+Function: "j0_towardzero":
+double: 6
+float: 9
+
+Function: "j0_upward":
+double: 9
+float: 9
+
+Function: "j1":
+double: 4
+float: 9
+
+Function: "j1_downward":
+double: 5
+float: 8
+
+Function: "j1_towardzero":
+double: 4
+float: 8
+
+Function: "j1_upward":
+double: 9
+float: 9
+
+Function: "jn":
+double: 4
+float: 4
+
+Function: "jn_downward":
+double: 7
+float: 9
+
+Function: "jn_towardzero":
+double: 7
+float: 9
+
+Function: "jn_upward":
+double: 7
+float: 9
+
+Function: "lgamma":
+double: 4
+float: 7
+
+Function: "lgamma_downward":
+double: 7
+float: 7
+
+Function: "lgamma_towardzero":
+double: 7
+float: 7
+
+Function: "lgamma_upward":
+double: 5
+float: 6
+
+Function: "log10":
+double: 2
+float: 2
+
+Function: "log10_downward":
+double: 2
+float: 3
+
+Function: "log10_towardzero":
+double: 2
+float: 1
+
+Function: "log10_upward":
+double: 2
+float: 2
+
+Function: "log1p":
+double: 1
+float: 1
+
+Function: "log1p_downward":
+double: 2
+float: 2
+
+Function: "log1p_towardzero":
+double: 2
+float: 2
+
+Function: "log1p_upward":
+double: 2
+float: 2
+
+Function: "log2":
+float: 1
+
+Function: "log2_downward":
+double: 1
+
+Function: "log2_towardzero":
+double: 1
+
+Function: "log2_upward":
+double: 1
+
+Function: "pow":
+double: 1
+
+Function: "pow_downward":
+double: 1
+float: 1
+
+Function: "pow_towardzero":
+double: 1
+float: 1
+
+Function: "pow_upward":
+double: 1
+float: 1
+
+Function: "sin":
+double: 1
+float: 1
+
+Function: "sin_downward":
+double: 4
+float: 1
+
+Function: "sin_towardzero":
+double: 3
+float: 1
+
+Function: "sin_upward":
+double: 7
+float: 1
+
+Function: "sincos":
+double: 1
+
+Function: "sincos_downward":
+double: 1
+float: 1
+
+Function: "sincos_towardzero":
+double: 4
+float: 1
+
+Function: "sincos_upward":
+double: 1
+float: 1
+
+Function: "sinh":
+double: 2
+float: 2
+
+Function: "sinh_downward":
+double: 3
+float: 3
+
+Function: "sinh_towardzero":
+double: 3
+float: 2
+
+Function: "sinh_upward":
+double: 3
+float: 3
+
+Function: "tan":
+float: 1
+
+Function: "tan_downward":
+double: 1
+float: 2
+
+Function: "tan_towardzero":
+double: 1
+float: 1
+
+Function: "tan_upward":
+double: 1
+float: 1
+
+Function: "tanh":
+double: 2
+float: 2
+
+Function: "tanh_downward":
+double: 3
+float: 3
+
+Function: "tanh_towardzero":
+double: 2
+float: 2
+
+Function: "tanh_upward":
+double: 3
+float: 3
+
+Function: "tgamma":
+double: 9
+float: 8
+
+Function: "tgamma_downward":
+double: 9
+float: 9
+
+Function: "tgamma_towardzero":
+double: 9
+float: 8
+
+Function: "tgamma_upward":
+double: 9
+float: 8
+
+Function: "y0":
+double: 3
+float: 9
+
+Function: "y0_downward":
+double: 3
+float: 9
+
+Function: "y0_towardzero":
+double: 4
+float: 9
+
+Function: "y0_upward":
+double: 3
+float: 9
+
+Function: "y1":
+double: 3
+float: 9
+
+Function: "y1_downward":
+double: 6
+float: 9
+
+Function: "y1_towardzero":
+double: 3
+float: 9
+
+Function: "y1_upward":
+double: 7
+float: 9
+
+Function: "yn":
+double: 3
+float: 3
+
+Function: "yn_downward":
+double: 6
+float: 8
+
+Function: "yn_towardzero":
+double: 8
+float: 8
+
+Function: "yn_upward":
+double: 8
+float: 8
+
+# end of automatic generation
diff --git a/sysdeps/or1k/fpu/libm-test-ulps-name b/sysdeps/or1k/fpu/libm-test-ulps-name
new file mode 100644
index 0000000000..7f72f7a873
--- /dev/null
+++ b/sysdeps/or1k/fpu/libm-test-ulps-name
@@ -0,0 +1 @@
+OpenRISC hard-float
diff --git a/sysdeps/or1k/fpu_control.h b/sysdeps/or1k/fpu_control.h
new file mode 100644
index 0000000000..f89364a31e
--- /dev/null
+++ b/sysdeps/or1k/fpu_control.h
@@ -0,0 +1,89 @@
+/* FPU control word bits.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _FPU_CONTROL_H
+#define _FPU_CONTROL_H
+
+#ifndef __or1k_hard_float__
+
+# define _FPU_RESERVED 0xffffffff
+# define _FPU_DEFAULT  0x00000000
+# define _FPU_GETCW(cw) (cw) = 0
+# define _FPU_SETCW(cw) (void) (cw)
+
+#else /* __or1k_hard_float__ */
+
+/* Layout of FPCSR:
+
+   The bits of the FPCSR are defined as follows, this should help
+   explain how the masks below have come to be.
+
+   +-----------+----------------------------+-----+----+
+   |  32 - 12  | 11 10  9  8  7  6  5  4  3 | 2-1 |  0 |
+   +-----------+----------------------------+-----+----+
+   |  Reserved | DZ IN IV IX  Z QN SN UN OV | RM  | EE |
+   +-----------+----------------------------+-----+----+
+
+   Exception flags:
+
+     DZ - divide by zero flag.
+     IN - infinite flag.
+     IV - invalid flag.
+     IX - inexact flag.
+      Z - zero flag.
+     QN - qnan flag.
+     SN - snan flag.
+     UN - underflow flag.
+     OV - overflow flag.
+
+   Rounding modes:
+
+   The FPCSR bits 2-1 labeled above as RM specify the rounding mode.
+
+     00 - round to nearest
+     01 - round to zero
+     10 - round to positive infinity
+     11 - round to negative infinity
+
+   Enabling exceptions:
+
+     EE - set to enable FPU exceptions.
+
+ */
+
+# define _FPU_RESERVED 0xfffff000
+/* Default: rounding to nearest with exceptions disabled.  */
+# define _FPU_DEFAULT  0
+/* IEEE: Same as above with exceptions enabled.  */
+# define _FPU_IEEE     (_FPU_DEFAULT | 1)
+
+# define _FPU_FPCSR_RM_MASK (0x3 << 1)
+
+/* Macros for accessing the hardware control word.  */
+# define _FPU_GETCW(cw) __asm__ volatile ("l.mfspr %0,r0,20" : "=r" (cw))
+# define _FPU_SETCW(cw) __asm__ volatile ("l.mtspr r0,%0,20" : : "r" (cw))
+
+#endif /* __or1k_hard_float__ */
+
+/* Type of the control word.  */
+typedef unsigned int fpu_control_t;
+
+/* Default control word set at startup.  */
+extern fpu_control_t __fpu_control;
+
+#endif	/* fpu_control.h */
diff --git a/sysdeps/or1k/libm-test-ulps-name b/sysdeps/or1k/libm-test-ulps-name
deleted file mode 100644
index 0af6591fd9..0000000000
--- a/sysdeps/or1k/libm-test-ulps-name
+++ /dev/null
@@ -1 +0,0 @@
-OpenRISC
diff --git a/sysdeps/or1k/math-tests-snan-payload.h b/sysdeps/or1k/math-tests-snan-payload.h
new file mode 100644
index 0000000000..62467a371c
--- /dev/null
+++ b/sysdeps/or1k/math-tests-snan-payload.h
@@ -0,0 +1,26 @@
+/* Configuration for math tests: sNaN payloads.  OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef OR1K_MATH_TESTS_SNAN_PAYLOAD_H
+#define OR1K_MATH_TESTS_SNAN_PAYLOAD_H 1
+
+/* OpenRISC floating-point instructions do not preserve NaN
+   payloads.  */
+#define SNAN_TESTS_PRESERVE_PAYLOAD	0
+
+#endif /* math-tests-snan-payload.h */
diff --git a/sysdeps/or1k/math-tests-trap.h b/sysdeps/or1k/math-tests-trap.h
new file mode 100644
index 0000000000..a95b42d66d
--- /dev/null
+++ b/sysdeps/or1k/math-tests-trap.h
@@ -0,0 +1,27 @@
+/* Configuration for math tests: support for enabling exception traps.
+   OpenRISC version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef OR1K_MATH_TESTS_TRAP_H
+#define OR1K_MATH_TESTS_TRAP_H 1
+
+#include <fenv.h>
+
+#define EXCEPTION_ENABLE_SUPPORTED(EXCEPT)	((EXCEPT) == 0)
+
+#endif /* math-tests-trap.h */
diff --git a/sysdeps/or1k/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps
index 785bae70d0..785bae70d0 100644
--- a/sysdeps/or1k/libm-test-ulps
+++ b/sysdeps/or1k/nofpu/libm-test-ulps
diff --git a/sysdeps/or1k/nofpu/libm-test-ulps-name b/sysdeps/or1k/nofpu/libm-test-ulps-name
new file mode 100644
index 0000000000..76c66a0e53
--- /dev/null
+++ b/sysdeps/or1k/nofpu/libm-test-ulps-name
@@ -0,0 +1 @@
+OpenRISC soft-float
diff --git a/sysdeps/or1k/sfp-machine.h b/sysdeps/or1k/sfp-machine.h
index d17fd37730..70aa421ef0 100644
--- a/sysdeps/or1k/sfp-machine.h
+++ b/sysdeps/or1k/sfp-machine.h
@@ -90,4 +90,21 @@
 
 #define FP_ROUNDMODE (_fpcsr & FP_RND_MASK)
 
+#ifdef __or1k_hard_float__
+#define FP_INIT_ROUNDMODE					\
+do {								\
+  __asm__ volatile ("l.mfspr %0,r0,20" : "=r" (_fpcsr));	\
+} while (0)
+
+#define FP_HANDLE_EXCEPTIONS					\
+do {								\
+  if (__builtin_expect (_fex, 0))				\
+    {								\
+      _fpcsr &= ~FP_EX_ALL;					\
+      _fpcsr |= _fex;						\
+      __asm__ volatile ("l.mtspr r0,%0,20" : : "r" (_fpcsr));	\
+    }								\
+} while (0)
+#endif /* __or1k_hard_float__ */
+
 #define _FP_TININESS_AFTER_ROUNDING 0
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index f9167056a8..0a06e4486b 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -182,19 +182,13 @@ __fesetround_inline (int round)
   return 0;
 }
 
-/* Same as __fesetround_inline, however without runtime check to use DFP
-   mtfsfi syntax (as relax_fenv_state) or if round value is valid.  */
+/* Same as __fesetround_inline, and it also disable the floating-point
+   inexact execption (bit 60 - XE, assuming NI is 0).  It does not check
+   if ROUND is a valid value.  */
 static inline void
-__fesetround_inline_nocheck (const int round)
+__fesetround_inline_disable_inexact (const int round)
 {
-#ifdef _ARCH_PWR9
-  __fe_mffscrn (round);
-#else
-  if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
-    __fe_mffscrn (round);
-  else
-    asm volatile ("mtfsfi 7,%0" : : "n" (round));
-#endif
+  asm volatile ("mtfsfi 7,%0" : : "n" (round));
 }
 
 #define FPSCR_MASK(bit) (1 << (31 - (bit)))
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index dbf71d8194..78e9a9ad6e 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -1506,6 +1506,30 @@ float: 3
 float128: 1
 ldouble: 4
 
+Function: "log2p1":
+double: 1
+float: 1
+float128: 2
+ldouble: 2
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+float128: 3
+ldouble: 4
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+float128: 2
+ldouble: 4
+
+Function: "log2p1_upward":
+double: 1
+float: 2
+float128: 2
+ldouble: 4
+
 Function: "log_downward":
 float: 2
 float128: 1
diff --git a/sysdeps/powerpc/fpu/round_to_integer.h b/sysdeps/powerpc/fpu/round_to_integer.h
index b68833640f..6996519c61 100644
--- a/sysdeps/powerpc/fpu/round_to_integer.h
+++ b/sysdeps/powerpc/fpu/round_to_integer.h
@@ -42,14 +42,14 @@ set_fenv_mode (enum round_mode mode)
   switch (mode)
   {
   case CEIL:
-    __fesetround_inline_nocheck (FE_UPWARD);
+    __fesetround_inline_disable_inexact (FE_UPWARD);
     break;
   case FLOOR:
-    __fesetround_inline_nocheck (FE_DOWNWARD);
+    __fesetround_inline_disable_inexact (FE_DOWNWARD);
     break;
   case TRUNC:
   case ROUND:
-    __fesetround_inline_nocheck (FE_TOWARDZERO);
+    __fesetround_inline_disable_inexact (FE_TOWARDZERO);
     break;
   case NEARBYINT:
     /*  Disable FE_INEXACT exception  */
diff --git a/sysdeps/powerpc/nofpu/Makefile b/sysdeps/powerpc/nofpu/Makefile
index fa462339ba..96657c9676 100644
--- a/sysdeps/powerpc/nofpu/Makefile
+++ b/sysdeps/powerpc/nofpu/Makefile
@@ -54,4 +54,5 @@ CFLAGS-s_fmaximum_magl.c += -fno-builtin-fabsl
 CFLAGS-s_fmaximum_mag_numl.c += -fno-builtin-fabsl
 CFLAGS-s_fminimum_magl.c += -fno-builtin-fabsl
 CFLAGS-s_fminimum_mag_numl.c += -fno-builtin-fabsl
+CFLAGS-s_log2p1l.c += -fno-builtin-fabsl
 endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.c
index 5b1a5a8cf0..3c279ddd1a 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.c
@@ -1,2 +1,6 @@
+#include <libm-alias-float.h>
+
 #define __llrintf __llrintf_power6
+#undef libm_alias_float
+#define libm_alias_float(a, b)
 #include <sysdeps/powerpc/powerpc32/fpu/s_llrintf.c>
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.c
index 061962b84d..bce8f4564f 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.c
@@ -1,2 +1,6 @@
+#include <libm-alias-float.h>
+
 #define __llrintf __llrintf_ppc32
+#undef libm_alias_float
+#define libm_alias_float(a, b)
 #include <sysdeps/powerpc/powerpc32/fpu/s_llrintf.c>
diff --git a/sysdeps/powerpc/powerpc64/configure b/sysdeps/powerpc/powerpc64/configure
index 6d8153ed50..9df2ceaa1f 100644
--- a/sysdeps/powerpc/powerpc64/configure
+++ b/sysdeps/powerpc/powerpc64/configure
@@ -12,7 +12,7 @@ then :
 else $as_nop
   libc_cv_overlapping_opd=no
 echo 'void foo (void) {}' > conftest.c
-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&5'
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $libc_cv_cc_submachine -S conftest.c -o conftest.s 1>&5'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
   ac_status=$?
@@ -51,7 +51,7 @@ int bar (void);
 int foo (void) { return bar () + 1; }
 EOF
   libc_cv_ppc64_notoc=no
-  if { ac_try='${CC-cc} $libc_cv_cc_submachine $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
+  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $libc_cv_cc_submachine -S -o conftest.s conftest.c'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
   ac_status=$?
diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
index 575745af3e..225d66ef1a 100644
--- a/sysdeps/powerpc/powerpc64/configure.ac
+++ b/sysdeps/powerpc/powerpc64/configure.ac
@@ -7,7 +7,7 @@ AC_CACHE_CHECK(for support for overlapping .opd entries,
 libc_cv_overlapping_opd, [dnl
 libc_cv_overlapping_opd=no
 echo 'void foo (void) {}' > conftest.c
-if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then
+if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $libc_cv_cc_submachine -S conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then
 changequote(,)dnl
   if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then
     if grep '\.TOC\.@tocbase[ 	]*,[ 	]*0' conftest.s > /dev/null; then
@@ -35,7 +35,7 @@ int bar (void);
 int foo (void) { return bar () + 1; }
 EOF
   libc_cv_ppc64_notoc=no
-  if AC_TRY_COMMAND([${CC-cc} $libc_cv_cc_submachine $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
+  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $libc_cv_cc_submachine -S -o conftest.s conftest.c]) \
      && AC_TRY_COMMAND([grep -q -E 'bar@notoc' conftest.s])
   then
     libc_cv_ppc64_notoc=yes
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
new file mode 100644
index 0000000000..d4ba76acae
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
@@ -0,0 +1,271 @@
+/* Optimized strncmp implementation for PowerPC64/POWER10.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Implements the function
+
+   int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
+
+   The implementation uses unaligned doubleword access to avoid specialized
+   code paths depending of data alignment for first 32 bytes and uses
+   vectorised loops after that.  */
+
+#ifndef STRNCMP
+# define STRNCMP strncmp
+#endif
+
+/* TODO: Change this to actual instructions when minimum binutils is upgraded
+   to 2.27.  Macros are defined below for these newer instructions in order
+   to maintain compatibility.  */
+
+#define LXVP(xtp,dq,ra)              \
+	.long(((6)<<(32-6))          \
+	| ((((xtp)-32)>>1)<<(32-10)) \
+	| ((1)<<(32-11))             \
+	| ((ra)<<(32-16))            \
+	| dq)
+
+#define COMPARE_16(vreg1,vreg2,offset) \
+	lxv	  vreg1+32,offset(r3); \
+	lxv	  vreg2+32,offset(r4); \
+	vcmpnezb. v7,vreg1,vreg2;      \
+	bne	  cr6,L(different);    \
+	cmpldi	  cr7,r5,16;           \
+	ble	  cr7,L(ret0);         \
+	addi	  r5,r5,-16;
+
+#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
+	LXVP(vreg1+32,offset,r3);                    \
+	LXVP(vreg2+32,offset,r4);                    \
+	vcmpnezb. v7,vreg1+1,vreg2+1;                \
+	bne	  cr6,L(label1);                     \
+	vcmpnezb. v7,vreg1,vreg2;                    \
+	bne	  cr6,L(label2);                     \
+	cmpldi	  cr7,r5,32;                         \
+	ble	  cr7,L(ret0);                       \
+	addi	  r5,r5,-32;
+
+#define TAIL_FIRST_16B(vreg1,vreg2) \
+	vctzlsbb r6,v7;             \
+	cmpld	 cr7,r5,r6;         \
+	ble	 cr7,L(ret0);       \
+	vextubrx r5,r6,vreg1;       \
+	vextubrx r4,r6,vreg2;       \
+	subf	 r3,r4,r5;          \
+	blr;
+
+#define TAIL_SECOND_16B(vreg1,vreg2) \
+	vctzlsbb r6,v7;              \
+	addi	 r0,r6,16;           \
+	cmpld	 cr7,r5,r0;          \
+	ble	 cr7,L(ret0);        \
+	vextubrx r5,r6,vreg1;        \
+	vextubrx r4,r6,vreg2;        \
+	subf	 r3,r4,r5;           \
+	blr;
+
+#define CHECK_N_BYTES(reg1,reg2,len_reg) \
+	sldi	  r6,len_reg,56;	 \
+	lxvl	  32+v4,reg1,r6;	 \
+	lxvl	  32+v5,reg2,r6;	 \
+	add	  reg1,reg1,len_reg;	 \
+	add	  reg2,reg2,len_reg;	 \
+	vcmpnezb  v7,v4,v5;		 \
+	vctzlsbb  r6,v7;		 \
+	cmpld	  cr7,r6,len_reg;	 \
+	blt	  cr7,L(different);	 \
+	cmpld	  cr7,r5,len_reg;	 \
+	ble	  cr7,L(ret0);		 \
+	sub	  r5,r5,len_reg;	 \
+
+	/* TODO: change this to .machine power10 when the minimum required
+	 binutils allows it.  */
+	.machine  power9
+ENTRY_TOCLESS (STRNCMP, 4)
+	/* Check if size is 0.  */
+	cmpdi	 cr0,r5,0
+	beq	 cr0,L(ret0)
+	andi.   r7,r3,4095
+	andi.   r8,r4,4095
+	cmpldi  cr0,r7,4096-16
+	cmpldi  cr1,r8,4096-16
+	bgt     cr0,L(crosses)
+	bgt     cr1,L(crosses)
+	COMPARE_16(v4,v5,0)
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+L(crosses):
+	andi.	 r7,r3,15
+	subfic	 r7,r7,16	/* r7(nalign1) = 16 - (str1 & 15).  */
+	andi.	 r9,r4,15
+	subfic	 r8,r9,16	/* r8(nalign2) = 16 - (str2 & 15).  */
+	cmpld	 cr7,r7,r8
+	beq	 cr7,L(same_aligned)
+	blt	 cr7,L(nalign1_min)
+
+	/* nalign2 is minimum and s2 pointer is aligned.  */
+	CHECK_N_BYTES(r3,r4,r8)
+	/* Are we on the 64B hunk which crosses a page?  */
+	andi.   r10,r3,63       /* Determine offset into 64B hunk.  */
+	andi.   r8,r3,15        /* The offset into the 16B hunk.  */
+	neg     r7,r3
+	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
+	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
+	beq     L(compare_64_pagecross)
+	mtctr   r7
+	b       L(compare_64B_unaligned)
+
+	/* nalign1 is minimum and s1 pointer is aligned.  */
+L(nalign1_min):
+	CHECK_N_BYTES(r3,r4,r7)
+	/* Are we on the 64B hunk which crosses a page?  */
+	andi.   r10,r4,63       /* Determine offset into 64B hunk.  */
+	andi.   r8,r4,15        /* The offset into the 16B hunk.  */
+	neg     r7,r4
+	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
+	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
+	beq     L(compare_64_pagecross)
+	mtctr   r7
+
+	.p2align 5
+L(compare_64B_unaligned):
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	COMPARE_16(v4,v5,32)
+	COMPARE_16(v4,v5,48)
+	addi    r3,r3,64
+	addi    r4,r4,64
+	bdnz    L(compare_64B_unaligned)
+
+	/* Cross the page boundary of s2, carefully. Only for first
+	iteration we have to get the count of 64B blocks to be checked.
+	From second iteration and beyond, loop counter is always 63.  */
+L(compare_64_pagecross):
+	li      r11, 63
+	mtctr   r11
+	cmpldi  r10,16
+	ble     L(cross_4)
+	cmpldi  r10,32
+	ble     L(cross_3)
+	cmpldi  r10,48
+	ble     L(cross_2)
+L(cross_1):
+	CHECK_N_BYTES(r3,r4,r9)
+	CHECK_N_BYTES(r3,r4,r8)
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	COMPARE_16(v4,v5,32)
+	addi    r3,r3,48
+	addi    r4,r4,48
+	b       L(compare_64B_unaligned)
+L(cross_2):
+	COMPARE_16(v4,v5,0)
+	addi    r3,r3,16
+	addi    r4,r4,16
+	CHECK_N_BYTES(r3,r4,r9)
+	CHECK_N_BYTES(r3,r4,r8)
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	addi    r3,r3,32
+	addi    r4,r4,32
+	b       L(compare_64B_unaligned)
+L(cross_3):
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	addi    r3,r3,32
+	addi    r4,r4,32
+	CHECK_N_BYTES(r3,r4,r9)
+	CHECK_N_BYTES(r3,r4,r8)
+	COMPARE_16(v4,v5,0)
+	addi    r3,r3,16
+	addi    r4,r4,16
+	b       L(compare_64B_unaligned)
+L(cross_4):
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	COMPARE_16(v4,v5,32)
+	addi    r3,r3,48
+	addi    r4,r4,48
+	CHECK_N_BYTES(r3,r4,r9)
+	CHECK_N_BYTES(r3,r4,r8)
+	b       L(compare_64B_unaligned)
+
+L(same_aligned):
+	CHECK_N_BYTES(r3,r4,r7)
+	/* Align s1 to 32B and adjust s2 address.
+	   Use lxvp only if both s1 and s2 are 32B aligned.  */
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	COMPARE_16(v4,v5,32)
+	COMPARE_16(v4,v5,48)
+	addi	r3,r3,64
+	addi	r4,r4,64
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	addi	r5,r5,32
+
+	clrldi  r6,r3,59
+	subfic	r7,r6,32
+	add	r3,r3,r7
+	add	r4,r4,r7
+	subf	r5,r7,r5
+	andi.	r7,r4,0x1F
+	beq	cr0,L(32B_aligned_loop)
+
+	.p2align 5
+L(16B_aligned_loop):
+	COMPARE_16(v4,v5,0)
+	COMPARE_16(v4,v5,16)
+	COMPARE_16(v4,v5,32)
+	COMPARE_16(v4,v5,48)
+	addi	r3,r3,64
+	addi	r4,r4,64
+	b	L(16B_aligned_loop)
+
+	/* Calculate and return the difference.  */
+L(different):
+	TAIL_FIRST_16B(v4,v5)
+
+	.p2align 5
+L(32B_aligned_loop):
+	COMPARE_32(v14,v16,0,tail1,tail2)
+	COMPARE_32(v18,v20,32,tail3,tail4)
+	COMPARE_32(v22,v24,64,tail5,tail6)
+	COMPARE_32(v26,v28,96,tail7,tail8)
+	addi	r3,r3,128
+	addi	r4,r4,128
+	b	L(32B_aligned_loop)
+
+L(tail1): TAIL_FIRST_16B(v15,v17)
+L(tail2): TAIL_SECOND_16B(v14,v16)
+L(tail3): TAIL_FIRST_16B(v19,v21)
+L(tail4): TAIL_SECOND_16B(v18,v20)
+L(tail5): TAIL_FIRST_16B(v23,v25)
+L(tail6): TAIL_SECOND_16B(v22,v24)
+L(tail7): TAIL_FIRST_16B(v27,v29)
+L(tail8): TAIL_SECOND_16B(v26,v28)
+
+	.p2align 5
+L(ret0):
+	li	r3,0
+	blr
+
+END(STRNCMP)
+libc_hidden_builtin_def(strncmp)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index a38ff46448..b847c19049 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -34,7 +34,7 @@ ifneq (,$(filter %le,$(config-machine)))
 sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \
 		   memmove-power10 memset-power10 rawmemchr-power9 \
 		   rawmemchr-power10 strcmp-power9 strcmp-power10 \
-		   strncmp-power9 strcpy-power9 stpcpy-power9 \
+		   strncmp-power9 strncmp-power10 strcpy-power9 stpcpy-power9 \
 		   strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
 endif
 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 30fd89e109..2bb47d3527 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -164,6 +164,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c.  */
   IFUNC_IMPL (i, name, strncmp,
 #ifdef __LITTLE_ENDIAN__
+	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1
+			      && hwcap & PPC_FEATURE_HAS_VSX,
+			      __strncmp_power10)
 	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 			      __strncmp_power9)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S
index 384edce8b9..fc784c2856 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S
+++ b/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S
@@ -18,6 +18,9 @@
 
 #define STRCHRNUL __strchrnul_power7
 
+#undef weak_alias
+#define weak_alias(a,b)
+
 #undef libc_hidden_builtin_def
 #define libc_hidden_builtin_def(name)
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c
index 8e42504efe..7127972250 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c
@@ -16,4 +16,16 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c>
+#include <string.h>
+
+#if IS_IN (libc)
+# define STRCHRNUL  __strchrnul_ppc
+extern __typeof (strchrnul) __strchrnul_ppc attribute_hidden;
+
+# include <string/strchrnul.c>
+# undef __strchrnul
+weak_alias (__strchrnul_ppc, __strchrnul)
+# ifdef SHARED
+__hidden_ver1 (__strchrnul_ppc, __GI___strchrnul, __strchrnul_ppc);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c
index 1587079580..664ce44345 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c
@@ -19,6 +19,9 @@
 
 #define __strncasecmp_l __strncasecmp_l_power7
 
+#undef weak_alias
+#define weak_alias(a,b)
+
 #undef libc_hidden_def
 #define libc_hidden_def(name)
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
new file mode 100644
index 0000000000..d7026c12e2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
+#define STRNCMP __strncmp_power10
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S>
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index 6178f4a432..a5ed67f766 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -29,6 +29,7 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
 extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
 # ifdef __LITTLE_ENDIAN__
 extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
+extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
 # endif
 # undef strncmp
 
@@ -36,6 +37,9 @@ extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
    ifunc symbol properly.  */
 libc_ifunc_redirected (__redirect_strncmp, strncmp,
 # ifdef __LITTLE_ENDIAN__
+			(hwcap2 & PPC_FEATURE2_ARCH_3_1
+			 && hwcap & PPC_FEATURE_HAS_VSX)
+			? __strncmp_power10 :
 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
 			? __strncmp_power9 :
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index f9efb50764..04ea56559e 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -282,6 +282,10 @@ tests += \
   tst-vfork2x \
   # tests
 
+tests-2.0 += \
+  tst-pthread_kill-exited
+  # tests-2.0
+
 tests-time64 += \
   tst-abstime-time64 \
   tst-cnd-timedwait-time64 \
diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps
index a898eebb20..e24adb94e8 100644
--- a/sysdeps/s390/fpu/libm-test-ulps
+++ b/sysdeps/s390/fpu/libm-test-ulps
@@ -1201,6 +1201,26 @@ Function: "log2_upward":
 double: 1
 ldouble: 1
 
+Function: "log2p1":
+double: 1
+float: 1
+ldouble: 3
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+ldouble: 3
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+ldouble: 2
+
+Function: "log2p1_upward":
+double: 1
+float: 2
+ldouble: 2
+
 Function: "log_downward":
 ldouble: 1
 
diff --git a/sysdeps/sparc/sparc64/rtld-memset.c b/sysdeps/sparc/sparc64/rtld-memset.c
index 55f3835790..a19202a620 100644
--- a/sysdeps/sparc/sparc64/rtld-memset.c
+++ b/sysdeps/sparc/sparc64/rtld-memset.c
@@ -1 +1,4 @@
 #include <string/memset.c>
+#if IS_IN(rtld)
+strong_alias (memset, __memset_ultra1)
+#endif
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 415aa1f14d..ae66590e91 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -200,6 +200,7 @@ tests += \
   tst-clone2 \
   tst-clone3 \
   tst-epoll \
+  tst-epoll-ioctls \
   tst-fanotify \
   tst-fdopendir-o_path \
   tst-getauxval \
diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h
index f82bde5d30..8dceaa1a52 100644
--- a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h
@@ -103,3 +103,18 @@
 #define HWCAP2_SVE_B16B16	(1UL << 45)
 #define HWCAP2_LRCPC3		(1UL << 46)
 #define HWCAP2_LSE128		(1UL << 47)
+#define HWCAP2_FPMR		(1UL << 48)
+#define HWCAP2_LUT		(1UL << 49)
+#define HWCAP2_FAMINMAX		(1UL << 50)
+#define HWCAP2_F8CVT		(1UL << 51)
+#define HWCAP2_F8FMA		(1UL << 52)
+#define HWCAP2_F8DP4		(1UL << 53)
+#define HWCAP2_F8DP2		(1UL << 54)
+#define HWCAP2_F8E4M3		(1UL << 55)
+#define HWCAP2_F8E5M2		(1UL << 56)
+#define HWCAP2_SME_LUTV2	(1UL << 57)
+#define HWCAP2_SME_F8F16	(1UL << 58)
+#define HWCAP2_SME_F8F32	(1UL << 59)
+#define HWCAP2_SME_SF8FMA	(1UL << 60)
+#define HWCAP2_SME_SF8DP4	(1UL << 61)
+#define HWCAP2_SME_SF8DP2	(1UL << 62)
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
index 92d69a5430..e28aaa5083 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone3.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
@@ -82,4 +82,3 @@ thread_start:
 	.size thread_start, .-thread_start
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/aarch64/libm.abilist b/sysdeps/unix/sysv/linux/aarch64/libm.abilist
index f0da228fbb..a4bb539670 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libm.abilist
@@ -1149,3 +1149,11 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index 26c3fbf18b..b685106954 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -79,6 +79,8 @@ GLIBC_2.40 _ZGVnN2v_asinh F
 GLIBC_2.40 _ZGVnN2v_asinhf F
 GLIBC_2.40 _ZGVnN2v_atanh F
 GLIBC_2.40 _ZGVnN2v_atanhf F
+GLIBC_2.40 _ZGVnN2v_cbrt F
+GLIBC_2.40 _ZGVnN2v_cbrtf F
 GLIBC_2.40 _ZGVnN2v_cosh F
 GLIBC_2.40 _ZGVnN2v_coshf F
 GLIBC_2.40 _ZGVnN2v_erf F
@@ -89,20 +91,29 @@ GLIBC_2.40 _ZGVnN2v_sinh F
 GLIBC_2.40 _ZGVnN2v_sinhf F
 GLIBC_2.40 _ZGVnN2v_tanh F
 GLIBC_2.40 _ZGVnN2v_tanhf F
+GLIBC_2.40 _ZGVnN2vv_hypot F
+GLIBC_2.40 _ZGVnN2vv_hypotf F
+GLIBC_2.40 _ZGVnN2vv_pow F
+GLIBC_2.40 _ZGVnN2vv_powf F
 GLIBC_2.40 _ZGVnN4v_acoshf F
 GLIBC_2.40 _ZGVnN4v_asinhf F
 GLIBC_2.40 _ZGVnN4v_atanhf F
+GLIBC_2.40 _ZGVnN4v_cbrtf F
 GLIBC_2.40 _ZGVnN4v_coshf F
 GLIBC_2.40 _ZGVnN4v_erfcf F
 GLIBC_2.40 _ZGVnN4v_erff F
 GLIBC_2.40 _ZGVnN4v_sinhf F
 GLIBC_2.40 _ZGVnN4v_tanhf F
+GLIBC_2.40 _ZGVnN4vv_hypotf F
+GLIBC_2.40 _ZGVnN4vv_powf F
 GLIBC_2.40 _ZGVsMxv_acosh F
 GLIBC_2.40 _ZGVsMxv_acoshf F
 GLIBC_2.40 _ZGVsMxv_asinh F
 GLIBC_2.40 _ZGVsMxv_asinhf F
 GLIBC_2.40 _ZGVsMxv_atanh F
 GLIBC_2.40 _ZGVsMxv_atanhf F
+GLIBC_2.40 _ZGVsMxv_cbrt F
+GLIBC_2.40 _ZGVsMxv_cbrtf F
 GLIBC_2.40 _ZGVsMxv_cosh F
 GLIBC_2.40 _ZGVsMxv_coshf F
 GLIBC_2.40 _ZGVsMxv_erf F
@@ -113,3 +124,7 @@ GLIBC_2.40 _ZGVsMxv_sinh F
 GLIBC_2.40 _ZGVsMxv_sinhf F
 GLIBC_2.40 _ZGVsMxv_tanh F
 GLIBC_2.40 _ZGVsMxv_tanhf F
+GLIBC_2.40 _ZGVsMxvv_hypot F
+GLIBC_2.40 _ZGVsMxvv_hypotf F
+GLIBC_2.40 _ZGVsMxvv_pow F
+GLIBC_2.40 _ZGVsMxvv_powf F
diff --git a/sysdeps/unix/sysv/linux/alpha/libm.abilist b/sysdeps/unix/sysv/linux/alpha/libm.abilist
index f5d8023d62..8ff65e3f7e 100644
--- a/sysdeps/unix/sysv/linux/alpha/libm.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libm.abilist
@@ -1308,3 +1308,11 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/arc/clone3.S b/sysdeps/unix/sysv/linux/arc/clone3.S
index d9a2dfff3e..38a27063fb 100644
--- a/sysdeps/unix/sysv/linux/arc/clone3.S
+++ b/sysdeps/unix/sysv/linux/arc/clone3.S
@@ -87,4 +87,3 @@ thread_start_clone3:
 	.size thread_start_clone3, .-thread_start_clone3
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/arc/libm.abilist b/sysdeps/unix/sysv/linux/arc/libm.abilist
index f2169f8f57..af9df2673c 100644
--- a/sysdeps/unix/sysv/linux/arc/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libm.abilist
@@ -757,3 +757,9 @@ GLIBC_2.35 fminimumf64 F
 GLIBC_2.35 fminimuml F
 GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/arm/be/libm.abilist b/sysdeps/unix/sysv/linux/arm/be/libm.abilist
index f020a8a904..0e460259f3 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.4 y1l F
 GLIBC_2.4 yn F
 GLIBC_2.4 ynf F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/arm/clone3.S b/sysdeps/unix/sysv/linux/arm/clone3.S
index e759de9730..094efc2f40 100644
--- a/sysdeps/unix/sysv/linux/arm/clone3.S
+++ b/sysdeps/unix/sysv/linux/arm/clone3.S
@@ -77,4 +77,3 @@ PSEUDO_END (__clone3)
 	.fnend
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/arm/le/libm.abilist b/sysdeps/unix/sysv/linux/arm/le/libm.abilist
index f020a8a904..0e460259f3 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.4 y1l F
 GLIBC_2.4 yn F
 GLIBC_2.4 ynf F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h
index 86444f0018..5ab19a8c08 100644
--- a/sysdeps/unix/sysv/linux/bits/socket.h
+++ b/sysdeps/unix/sysv/linux/bits/socket.h
@@ -181,7 +181,7 @@ typedef __socklen_t socklen_t;
 #include <bits/sockaddr.h>
 
 /* Structure describing a generic socket address.  */
-struct sockaddr
+struct __attribute_struct_may_alias__ sockaddr
   {
     __SOCKADDR_COMMON (sa_);	/* Common data: address family and length.  */
     char sa_data[14];		/* Address data.  */
@@ -194,7 +194,7 @@ struct sockaddr
 #define _SS_PADSIZE \
   (_SS_SIZE - __SOCKADDR_COMMON_SIZE - sizeof (__ss_aligntype))
 
-struct sockaddr_storage
+struct __attribute_struct_may_alias__ sockaddr_storage
   {
     __SOCKADDR_COMMON (ss_);	/* Address family, etc.  */
     char __ss_padding[_SS_PADSIZE];
diff --git a/sysdeps/unix/sysv/linux/csky/libm.abilist b/sysdeps/unix/sysv/linux/csky/libm.abilist
index 225a5e9b81..4c1216044d 100644
--- a/sysdeps/unix/sysv/linux/csky/libm.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libm.abilist
@@ -823,3 +823,9 @@ GLIBC_2.35 fminimumf64 F
 GLIBC_2.35 fminimuml F
 GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
index 3d8f3938da..b986898598 100644
--- a/sysdeps/unix/sysv/linux/dl-execstack.c
+++ b/sysdeps/unix/sysv/linux/dl-execstack.c
@@ -27,35 +27,30 @@
 #include <sysdep.h>
 #include <unistd.h>
 
-extern int __stack_prot attribute_relro attribute_hidden;
-
 static int
 make_main_stack_executable (void **stack_endp)
 {
   /* This gives us the highest/lowest page that needs to be changed.  */
   uintptr_t page = ((uintptr_t) *stack_endp
 		    & -(intptr_t) GLRO(dl_pagesize));
-  int result = 0;
 
-  if (__builtin_expect (__mprotect ((void *) page, GLRO(dl_pagesize),
-				    __stack_prot) == 0, 1))
-    goto return_success;
-  result = errno;
-  goto out;
+  if (__mprotect ((void *) page, GLRO(dl_pagesize),
+		  PROT_READ | PROT_WRITE | PROT_EXEC
+#if _STACK_GROWS_DOWN
+		  | PROT_GROWSDOWN
+#elif _STACK_GROWS_UP
+		  | PROT_GROWSUP
+#endif
+		  ) != 0)
+    return errno;
 
- return_success:
   /* Clear the address.  */
   *stack_endp = NULL;
 
   /* Remember that we changed the permission.  */
   GL(dl_stack_flags) |= PF_X;
 
- out:
-#ifdef check_consistency
-  check_consistency ();
-#endif
-
-  return result;
+  return 0;
 }
 
 int
diff --git a/sysdeps/unix/sysv/linux/hppa/libm.abilist b/sysdeps/unix/sysv/linux/hppa/libm.abilist
index 450ac03223..3ea5b611eb 100644
--- a/sysdeps/unix/sysv/linux/hppa/libm.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
 GLIBC_2.4 exp2l F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S b/sysdeps/unix/sysv/linux/i386/clone3.S
index abb7356b1f..6803e59644 100644
--- a/sysdeps/unix/sysv/linux/i386/clone3.S
+++ b/sysdeps/unix/sysv/linux/i386/clone3.S
@@ -120,4 +120,3 @@ L(thread_start):
 PSEUDO_END (__clone3)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist
index 5d89aaa08e..696e52e65a 100644
--- a/sysdeps/unix/sysv/linux/i386/libm.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libm.abilist
@@ -1188,3 +1188,11 @@ GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
 GLIBC_2.35 hypot F
 GLIBC_2.35 hypotf F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/loongarch/clone3.S b/sysdeps/unix/sysv/linux/loongarch/clone3.S
index 20eaf4adc2..dfffb1f3d6 100644
--- a/sysdeps/unix/sysv/linux/loongarch/clone3.S
+++ b/sysdeps/unix/sysv/linux/loongarch/clone3.S
@@ -80,4 +80,3 @@ L (thread_start3):
 	END (__thread_start3)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
index b3dbd00001..1c6f02ac5a 100644
--- a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
@@ -1028,3 +1028,11 @@ GLIBC_2.36 ynf32x F
 GLIBC_2.36 ynf64 F
 GLIBC_2.36 ynf64x F
 GLIBC_2.36 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
index f020a8a904..0e460259f3 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.4 y1l F
 GLIBC_2.4 yn F
 GLIBC_2.4 ynf F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
index e7cd739a54..be8262bb48 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
@@ -884,3 +884,9 @@ GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
 GLIBC_2.35 hypot F
 GLIBC_2.35 hypotf F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
index 1f7f63f60c..cc091b2d4e 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
index 1f7f63f60c..cc091b2d4e 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/microblaze/syscalls.list b/sysdeps/unix/sysv/linux/microblaze/syscalls.list
index 878ce7e6a2..5760c657a6 100644
--- a/sysdeps/unix/sysv/linux/microblaze/syscalls.list
+++ b/sysdeps/unix/sysv/linux/microblaze/syscalls.list
@@ -1,5 +1,3 @@
 # File name	Caller	Syscall name	Args	Strong name	Weak names
 
-cacheflush	EXTRA	cacheflush	i:iiii	__cacheflush	cacheflush
-
 personality	EXTRA	personality	Ei:i	__personality	personality
diff --git a/sysdeps/unix/sysv/linux/mips/clone3.S b/sysdeps/unix/sysv/linux/mips/clone3.S
index 481b8ae963..58925d6b41 100644
--- a/sysdeps/unix/sysv/linux/mips/clone3.S
+++ b/sysdeps/unix/sysv/linux/mips/clone3.S
@@ -158,4 +158,3 @@ L(thread_start_clone3):
 END(__thread_start_clone3)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
index 797071aee8..4d442a989d 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
 GLIBC_2.4 exp2l F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
index 14758703cf..ae7b4a7558 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
@@ -1149,3 +1149,11 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/net/if_packet.h b/sysdeps/unix/sysv/linux/net/if_packet.h
index 9ffb69b508..c17e1c23c5 100644
--- a/sysdeps/unix/sysv/linux/net/if_packet.h
+++ b/sysdeps/unix/sysv/linux/net/if_packet.h
@@ -26,7 +26,7 @@
    From Linux 2.1 the AF_PACKET interface is preferred and you should
    consider using it in place of this one.  */
 
-struct sockaddr_pkt
+struct __attribute_struct_may_alias__ sockaddr_pkt
   {
     __SOCKADDR_COMMON (spkt_);
     unsigned char spkt_device[14];
diff --git a/sysdeps/unix/sysv/linux/netash/ash.h b/sysdeps/unix/sysv/linux/netash/ash.h
index 7d885d17cc..7a6ff50b17 100644
--- a/sysdeps/unix/sysv/linux/netash/ash.h
+++ b/sysdeps/unix/sysv/linux/netash/ash.h
@@ -22,7 +22,7 @@
 #include <features.h>
 #include <bits/sockaddr.h>
 
-struct sockaddr_ash
+struct __attribute_struct_may_alias__ sockaddr_ash
   {
     __SOCKADDR_COMMON (sash_);		/* Common data: address family etc.  */
     int sash_ifindex;			/* Interface to use.  */
diff --git a/sysdeps/unix/sysv/linux/neteconet/ec.h b/sysdeps/unix/sysv/linux/neteconet/ec.h
index b07a107961..f3132f06ff 100644
--- a/sysdeps/unix/sysv/linux/neteconet/ec.h
+++ b/sysdeps/unix/sysv/linux/neteconet/ec.h
@@ -28,7 +28,7 @@ struct ec_addr
     unsigned char net;			/* Network number.  */
   };
 
-struct sockaddr_ec
+struct __attribute_struct_may_alias__ sockaddr_ec
   {
     __SOCKADDR_COMMON (sec_);
     unsigned char port;			/* Port number.  */
diff --git a/sysdeps/unix/sysv/linux/netiucv/iucv.h b/sysdeps/unix/sysv/linux/netiucv/iucv.h
index f5fad81751..27151e8bbe 100644
--- a/sysdeps/unix/sysv/linux/netiucv/iucv.h
+++ b/sysdeps/unix/sysv/linux/netiucv/iucv.h
@@ -23,7 +23,7 @@
 
 __BEGIN_DECLS
 
-struct sockaddr_iucv
+struct __attribute_struct_may_alias__ sockaddr_iucv
   {
     __SOCKADDR_COMMON (siucv_);
     unsigned short	siucv_port;		/* Reserved */
diff --git a/sysdeps/unix/sysv/linux/nios2/libm.abilist b/sysdeps/unix/sysv/linux/nios2/libm.abilist
index c0ebe119dc..9ed3fbdee4 100644
--- a/sysdeps/unix/sysv/linux/nios2/libm.abilist
+++ b/sysdeps/unix/sysv/linux/nios2/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/or1k/Versions b/sysdeps/unix/sysv/linux/or1k/Versions
new file mode 100644
index 0000000000..c1299de116
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/or1k/Versions
@@ -0,0 +1,14 @@
+libc {
+  GLIBC_2.35 {
+    getcontext;
+    setcontext;
+    swapcontext;
+    makecontext;
+  }
+  GLIBC_2.40 {
+    getcontext;
+    setcontext;
+    swapcontext;
+    makecontext;
+  }
+}
diff --git a/sysdeps/unix/sysv/linux/or1k/getcontext-common.S b/sysdeps/unix/sysv/linux/or1k/getcontext-common.S
new file mode 100644
index 0000000000..9187749615
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/or1k/getcontext-common.S
@@ -0,0 +1,88 @@
+/* Save current context.  OpenRISC common version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This common getcontext template helps define different
+   implementations using control macros.  Before including
+   this file in another file define the following:
+
+     __CONTEXT_FUNC_NAME
+     __CONTEXT_ENABLE_FPCSR
+     __CONTEXT_SIGMASK_OFFSET
+ */
+
+/* int getcontext (ucontext_t *ucp)
+
+   Returns 0 on success -1 and errno on failure.
+ */
+	.text
+ENTRY(__CONTEXT_FUNC_NAME)
+	/* Store r1, the stack pointer.  */
+	l.sw	(UCONTEXT_MCONTEXT +  1*4)(r3), r1
+	/* Store r2, the frame pointer.  */
+	l.sw	(UCONTEXT_MCONTEXT +  2*4)(r3), r2
+	/* Store r9, the link register.  */
+	l.sw	(UCONTEXT_MCONTEXT +  9*4)(r3), r9
+	/* Store r9 to reg[11] too, as we need two links for makecontext.  */
+	l.sw	(UCONTEXT_MCONTEXT + 11*4)(r3), r9
+	/* Store r10, the TLS register.  */
+	l.sw	(UCONTEXT_MCONTEXT + 10*4)(r3), r10
+	/* Store r14-r30 even, callee saved registers.  */
+	l.sw	(UCONTEXT_MCONTEXT + 14*4)(r3), r14
+	l.sw	(UCONTEXT_MCONTEXT + 16*4)(r3), r16
+	l.sw	(UCONTEXT_MCONTEXT + 18*4)(r3), r18
+	l.sw	(UCONTEXT_MCONTEXT + 20*4)(r3), r20
+	l.sw	(UCONTEXT_MCONTEXT + 22*4)(r3), r22
+	l.sw	(UCONTEXT_MCONTEXT + 24*4)(r3), r24
+	l.sw	(UCONTEXT_MCONTEXT + 26*4)(r3), r26
+	l.sw	(UCONTEXT_MCONTEXT + 28*4)(r3), r28
+	l.sw	(UCONTEXT_MCONTEXT + 30*4)(r3), r30
+
+#ifdef __CONTEXT_ENABLE_FPCSR
+# ifdef __or1k_hard_float__
+	/* Store the floating point state.  */
+	l.mfspr	r4, r0, 20
+	l.sw	(MCONTEXT_FPCSR)(r3), r4
+# else
+	/* Store zero to indicate default rounding as per softfloat.  */
+	l.sw	(MCONTEXT_FPCSR)(r3), r0
+# endif /* __or1k_hard_float__ */
+#endif /* __CONTEXT_ENABLE_FPCSR */
+
+	/* Get signal mask.  */
+	/* rt_sigprocmask (SIG_BLOCK, NULL, &ucp->uc_sigmask, _NSIG8) */
+	l.ori	r6, r0, _NSIG8
+	l.addi	r5, r3, __CONTEXT_SIGMASK_OFFSET
+	l.ori	r4, r0, 0
+	l.ori	r3, r0, SIG_BLOCK
+	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
+	/* Do the syscall.  */
+	l.sys	1
+	 l.nop
+
+	/* if -4096 < ret < 0 holds, it's an error */
+	l.sfgeui r11, 0xf001
+	l.bf	1f
+	 l.nop
+
+	l.jr	r9
+	 l.ori r11, r0, 0
+
+1:	l.j	__syscall_error
+	 l.ori	r3, r11, 0
+
+END(__CONTEXT_FUNC_NAME)
diff --git a/sysdeps/unix/sysv/linux/or1k/getcontext.S b/sysdeps/unix/sysv/linux/or1k/getcontext.S
index a25b377bda..da69e6999c 100644
--- a/sysdeps/unix/sysv/linux/or1k/getcontext.S
+++ b/sysdeps/unix/sysv/linux/or1k/getcontext.S
@@ -17,56 +17,35 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <shlib-compat.h>
 #include "ucontext_i.h"
 
-/* int getcontext (ucontext_t *ucp)
+#define __CONTEXT_FUNC_NAME __getcontext
+#define __CONTEXT_ENABLE_FPCSR 1
+#define __CONTEXT_SIGMASK_OFFSET UCONTEXT_SIGMASK
 
-   Returns 0 on success -1 and errno on failure.
- */
-	.text
-ENTRY(__getcontext)
-	/* Store r1, the stack pointer.  */
-	l.sw	(UCONTEXT_MCONTEXT +  1*4)(r3), r1
-	/* Store r2, the frame pointer.  */
-	l.sw	(UCONTEXT_MCONTEXT +  2*4)(r3), r2
-	/* Store r9, the link register.  */
-	l.sw	(UCONTEXT_MCONTEXT +  9*4)(r3), r9
-	/* Store r9 to reg[11] too, as we need two links for makecontext.  */
-	l.sw	(UCONTEXT_MCONTEXT + 11*4)(r3), r9
-	/* Store r10, the TLS register.  */
-	l.sw	(UCONTEXT_MCONTEXT + 10*4)(r3), r10
-	/* Store r14-r30 even, callee saved registers.  */
-	l.sw	(UCONTEXT_MCONTEXT + 14*4)(r3), r14
-	l.sw	(UCONTEXT_MCONTEXT + 16*4)(r3), r16
-	l.sw	(UCONTEXT_MCONTEXT + 18*4)(r3), r18
-	l.sw	(UCONTEXT_MCONTEXT + 20*4)(r3), r20
-	l.sw	(UCONTEXT_MCONTEXT + 22*4)(r3), r22
-	l.sw	(UCONTEXT_MCONTEXT + 24*4)(r3), r24
-	l.sw	(UCONTEXT_MCONTEXT + 26*4)(r3), r26
-	l.sw	(UCONTEXT_MCONTEXT + 28*4)(r3), r28
-	l.sw	(UCONTEXT_MCONTEXT + 30*4)(r3), r30
+#include "getcontext-common.S"
 
-	/* Get signal mask.  */
-	/* rt_sigprocmask (SIG_BLOCK, NULL, &ucp->uc_sigmask, _NSIG8) */
-	l.ori	r6, r0, _NSIG8
-	l.addi	r5, r3, UCONTEXT_SIGMASK
-	l.ori	r4, r0, 0
-	l.ori	r3, r0, SIG_BLOCK
-	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
-	/* Do the syscall.  */
-	l.sys	1
-	 l.nop
+versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_40)
 
-	/* if -4096 < ret < 0 holds, it's an error */
-	l.sfgeui r11, 0xf001
-	l.bf	1f
-	 l.nop
+#if SHLIB_COMPAT (libc, GLIBC_2_35, GLIBC_2_40)
 
-	l.jr	r9
-	 l.ori r11, r0, 0
+/* Define a compat version of getcontext for glibc's before the fpcsr
+   field was added to mcontext_t.  The offset sigmask changed with this
+   introduction, the change was done because glibc's definition of
+   ucontext_t was initially defined incompatible with the Linux
+   definition of ucontext_t.  We keep the compatability definition to
+   allow getcontext, setcontext and swapcontext to work in older
+   binaries.  */
 
-1:	l.j	__syscall_error
-	 l.ori	r3, r11, 0
+# undef __CONTEXT_FUNC_NAME
+# undef __CONTEXT_ENABLE_FPCSR
+# undef __CONTEXT_SIGMASK_OFFSET
+# define __CONTEXT_FUNC_NAME __getcontext_nofpcsr
+# define __CONTEXT_SIGMASK_OFFSET (UCONTEXT_SIGMASK - 4)
 
-END(__getcontext)
-weak_alias(__getcontext, getcontext)
+# include "getcontext-common.S"
+
+compat_symbol (libc, __getcontext_nofpcsr, getcontext, GLIBC_2_35)
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist
index c40c843aaf..959e59e7e7 100644
--- a/sysdeps/unix/sysv/linux/or1k/libc.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist
@@ -2255,3 +2255,7 @@ GLIBC_2.39 stdc_trailing_zeros_ui F
 GLIBC_2.39 stdc_trailing_zeros_ul F
 GLIBC_2.39 stdc_trailing_zeros_ull F
 GLIBC_2.39 stdc_trailing_zeros_us F
+GLIBC_2.40 getcontext F
+GLIBC_2.40 makecontext F
+GLIBC_2.40 setcontext F
+GLIBC_2.40 swapcontext F
diff --git a/sysdeps/unix/sysv/linux/or1k/libm.abilist b/sysdeps/unix/sysv/linux/or1k/libm.abilist
index 9d26508ff5..69655f7d23 100644
--- a/sysdeps/unix/sysv/linux/or1k/libm.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libm.abilist
@@ -757,3 +757,9 @@ GLIBC_2.35 ynf32 F
 GLIBC_2.35 ynf32x F
 GLIBC_2.35 ynf64 F
 GLIBC_2.35 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/or1k/makecontext.c b/sysdeps/unix/sysv/linux/or1k/makecontext.c
index fa6626e7de..7e131bae41 100644
--- a/sysdeps/unix/sysv/linux/or1k/makecontext.c
+++ b/sysdeps/unix/sysv/linux/or1k/makecontext.c
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <shlib-compat.h>
 #include <sysdep.h>
 #include <stdarg.h>
 #include <stdint.h>
@@ -36,12 +37,11 @@
      r1     : stack pointer
      r2     : frame pointer, set to NULL
 */
-void
-__makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
+static void
+do_makecontext (ucontext_t *ucp, void (*startcontext) (void),
+		void (*func) (void), int argc, va_list ap)
 {
-  extern void __startcontext (void);
   unsigned long int *sp;
-  va_list ap;
   int i;
 
   sp = (unsigned long int *)
@@ -55,8 +55,8 @@ __makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
 
   /* Keep uc_link in r14.  */
   ucp->uc_mcontext.__gprs[14] = (uintptr_t) ucp->uc_link;
-  /* Return address points to function __startcontext.  */
-  ucp->uc_mcontext.__gprs[9] = (uintptr_t) &__startcontext;
+  /* Return address points to function startcontext.  */
+  ucp->uc_mcontext.__gprs[9] = (uintptr_t) startcontext;
   /* Frame pointer is null.  */
   ucp->uc_mcontext.__gprs[2] = (uintptr_t) 0;
   /* Restart in user-space starting at 'func'.  */
@@ -64,14 +64,47 @@ __makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
   /* Set stack pointer.  */
   ucp->uc_mcontext.__gprs[1] = (uintptr_t) sp;
 
-  va_start (ap, argc);
   for (i = 0; i < argc; ++i)
     if (i < 6)
       ucp->uc_mcontext.__gprs[i + 3] = va_arg (ap, unsigned long int);
     else
       sp[i - 6] = va_arg (ap, unsigned long int);
+}
 
+void
+__makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
+{
+  extern void __startcontext (void);
+  va_list ap;
+
+  va_start (ap, argc);
+  do_makecontext (ucp, &__startcontext, func, argc, ap);
   va_end (ap);
 }
 
-weak_alias (__makecontext, makecontext)
+versioned_symbol (libc, __makecontext, makecontext, GLIBC_2_40);
+
+#if SHLIB_COMPAT (libc, GLIBC_2_35, GLIBC_2_40)
+
+/* Define a compat version of makecontext for glibc's before the fpcsr
+   field was added to mcontext_t.  The offset sigmask changed with this
+   introduction, the change was done because glibc's definition of
+   ucontext_t was initially defined incompatible with the Linux
+   definition of ucontext_t.  We keep the compatability definition to
+   allow getcontext, setcontext and swapcontext to work in older
+   binaries.  */
+
+void
+__makecontext_nofpcsr (ucontext_t *ucp, void (*func) (void), int argc, ...)
+{
+  extern void __startcontext_nofpcsr (void);
+  va_list ap;
+
+  va_start (ap, argc);
+  do_makecontext (ucp, &__startcontext_nofpcsr, func, argc, ap);
+  va_end (ap);
+}
+
+compat_symbol (libc, __makecontext_nofpcsr, makecontext, GLIBC_2_35);
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/or1k/setcontext-common.S b/sysdeps/unix/sysv/linux/or1k/setcontext-common.S
new file mode 100644
index 0000000000..8a4f147513
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/or1k/setcontext-common.S
@@ -0,0 +1,120 @@
+/* Set current context.  OpenRISC common version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This common setcontext and startcontext template helps define
+   different implementations using control macros.  Before including
+   this file in another file define the following:
+
+     __CONTEXT_FUNC_NAME
+     __CONTEXT_ENABLE_FPCSR
+     __CONTEXT_SIGMASK_OFFSET
+     __STARTCONTEXT_FUNC_NAME
+ */
+
+/* int setcontext (const ucontext_t *ucp) */
+	.text
+ENTRY(__CONTEXT_FUNC_NAME)
+	l.ori	r30, r3, 0
+
+	/* Restore signal mask.  */
+	/* rt_sigprocmask (SIG_SETMASK, &ucp->uc_sigmask, NULL, _NSIG8) */
+	l.ori	r6, r0, _NSIG8
+	l.ori	r5, r0, 0
+	l.addi	r4, r3, __CONTEXT_SIGMASK_OFFSET
+	l.ori	r3, r0, SIG_SETMASK
+	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
+	/* Do the syscall.  */
+	l.sys 1
+	 l.nop
+
+	/* if -4096 < ret < 0 holds, it's an error */
+	l.sfgeui r11, 0xf001
+	l.bf	1f
+	 l.nop
+#ifdef __CONTEXT_ENABLE_FPCSR
+# ifdef __or1k_hard_float__
+	/* Restore the floating point state.  */
+	l.lwz	r28, (MCONTEXT_FPCSR)(r30)
+	l.mtspr	r0, r28, 20
+# endif /* __or1k_hard_float__ */
+#endif /* __CONTEXT_ENABLE_FPCSR */
+	/* Restore argument registers, for the makecontext case.  */
+	l.lwz	r3, (UCONTEXT_MCONTEXT +  3*4)(r30)
+	l.lwz	r4, (UCONTEXT_MCONTEXT +  4*4)(r30)
+	l.lwz	r5, (UCONTEXT_MCONTEXT +  5*4)(r30)
+	l.lwz	r6, (UCONTEXT_MCONTEXT +  6*4)(r30)
+	l.lwz	r7, (UCONTEXT_MCONTEXT +  7*4)(r30)
+	l.lwz	r8, (UCONTEXT_MCONTEXT +  8*4)(r30)
+
+	/* Restore registers stored in getcontext.  */
+	l.lwz	r1,  (UCONTEXT_MCONTEXT +  1*4)(r30)
+	l.lwz	r2,  (UCONTEXT_MCONTEXT +  2*4)(r30)
+	l.lwz	r9,  (UCONTEXT_MCONTEXT +  9*4)(r30)
+	l.lwz	r10, (UCONTEXT_MCONTEXT + 10*4)(r30)
+	l.lwz	r11, (UCONTEXT_MCONTEXT + 11*4)(r30)
+	/* Restore r14-r30 even, callee saved registers.  */
+	l.lwz	r14, (UCONTEXT_MCONTEXT + 14*4)(r30)
+	l.lwz	r16, (UCONTEXT_MCONTEXT + 16*4)(r30)
+	l.lwz	r18, (UCONTEXT_MCONTEXT + 18*4)(r30)
+	l.lwz	r20, (UCONTEXT_MCONTEXT + 20*4)(r30)
+	l.lwz	r22, (UCONTEXT_MCONTEXT + 22*4)(r30)
+	l.lwz	r24, (UCONTEXT_MCONTEXT + 24*4)(r30)
+	l.lwz	r26, (UCONTEXT_MCONTEXT + 26*4)(r30)
+	l.lwz	r28, (UCONTEXT_MCONTEXT + 28*4)(r30)
+	l.lwz	r30, (UCONTEXT_MCONTEXT + 30*4)(r30)
+
+	l.jr	r11
+	 l.ori	r11, r0, 0
+
+1:	l.j	__syscall_error
+	 l.ori	r3, r11, 0
+
+END (__CONTEXT_FUNC_NAME)
+
+	/* We add a NOP here because when the unwinder is looking for the
+	   enclosing function of the link register (r9) address FDE lookup will
+	   use '$r9 - 1' finding setcontext which is wrong.  This is because in
+	   makecontext we have set r9 to the start of &__startcontext.
+
+	   If this NOP did not exist the unwinder would repeatedly find
+	   __setcontext's FDE in an infinite loop.  Modifying/deleting the below
+	   __startcontext's FDE has no help on this.  */
+	l.nop
+
+ENTRY(__STARTCONTEXT_FUNC_NAME)
+
+	l.ori	r3, r14, 0
+	l.sfeq	r3, r0
+	/* If uc_link is not 0 resume there, otherwise exit.  */
+	l.bnf	__CONTEXT_FUNC_NAME
+	 l.nop
+
+#ifdef SHARED
+	/* Obtain a pointer to .got in r16 */
+	l.jal	0x8
+	 l.movhi r16, gotpchi(_GLOBAL_OFFSET_TABLE_-4)
+	l.ori	r16, r16, gotpclo(_GLOBAL_OFFSET_TABLE_+0)
+	l.add	r16, r16, r9
+	l.lwz	r16, got(exit)(r16)
+	l.jr	r16
+#else
+	l.j	exit
+#endif
+	 l.nop
+
+END(__STARTCONTEXT_FUNC_NAME)
diff --git a/sysdeps/unix/sysv/linux/or1k/setcontext.S b/sysdeps/unix/sysv/linux/or1k/setcontext.S
index d28a0ac0aa..a49a5c51c3 100644
--- a/sysdeps/unix/sysv/linux/or1k/setcontext.S
+++ b/sysdeps/unix/sysv/linux/or1k/setcontext.S
@@ -16,93 +16,39 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <shlib-compat.h>
 #include <sysdep.h>
 #include "ucontext_i.h"
 
-/* int setcontext (const ucontext_t *ucp) */
-	.text
-ENTRY(__setcontext)
-	l.ori	r30, r3, 0
+#define __CONTEXT_FUNC_NAME __setcontext
+#define __CONTEXT_ENABLE_FPCSR 1
+#define __CONTEXT_SIGMASK_OFFSET UCONTEXT_SIGMASK
+#define __STARTCONTEXT_FUNC_NAME __startcontext
 
-	/* Restore signal mask.  */
-	/* rt_sigprocmask (SIG_SETMASK, &ucp->uc_sigmask, NULL, _NSIG8) */
-	l.ori	r6, r0, _NSIG8
-	l.ori	r5, r0, 0
-	l.addi	r4, r3, UCONTEXT_SIGMASK
-	l.ori	r3, r0, SIG_SETMASK
-	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
-	/* Do the syscall.  */
-	l.sys 1
-	 l.nop
+#include "setcontext-common.S"
 
-	/* if -4096 < ret < 0 holds, it's an error */
-	l.sfgeui r11, 0xf001
-	l.bf	1f
-	 l.nop
+versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_40)
 
-	/* Restore argument registers, for the makecontext case.  */
-	l.lwz	r3, (UCONTEXT_MCONTEXT +  3*4)(r30)
-	l.lwz	r4, (UCONTEXT_MCONTEXT +  4*4)(r30)
-	l.lwz	r5, (UCONTEXT_MCONTEXT +  5*4)(r30)
-	l.lwz	r6, (UCONTEXT_MCONTEXT +  6*4)(r30)
-	l.lwz	r7, (UCONTEXT_MCONTEXT +  7*4)(r30)
-	l.lwz	r8, (UCONTEXT_MCONTEXT +  8*4)(r30)
+#if SHLIB_COMPAT (libc, GLIBC_2_35, GLIBC_2_40)
 
-	/* Restore registers stored in getcontext.  */
-	l.lwz	r1,  (UCONTEXT_MCONTEXT +  1*4)(r30)
-	l.lwz	r2,  (UCONTEXT_MCONTEXT +  2*4)(r30)
-	l.lwz	r9,  (UCONTEXT_MCONTEXT +  9*4)(r30)
-	l.lwz	r10, (UCONTEXT_MCONTEXT + 10*4)(r30)
-	l.lwz	r11, (UCONTEXT_MCONTEXT + 11*4)(r30)
-	/* Restore r14-r30 even, callee saved registers.  */
-	l.lwz	r14, (UCONTEXT_MCONTEXT + 14*4)(r30)
-	l.lwz	r16, (UCONTEXT_MCONTEXT + 16*4)(r30)
-	l.lwz	r18, (UCONTEXT_MCONTEXT + 18*4)(r30)
-	l.lwz	r20, (UCONTEXT_MCONTEXT + 20*4)(r30)
-	l.lwz	r22, (UCONTEXT_MCONTEXT + 22*4)(r30)
-	l.lwz	r24, (UCONTEXT_MCONTEXT + 24*4)(r30)
-	l.lwz	r26, (UCONTEXT_MCONTEXT + 26*4)(r30)
-	l.lwz	r28, (UCONTEXT_MCONTEXT + 28*4)(r30)
-	l.lwz	r30, (UCONTEXT_MCONTEXT + 30*4)(r30)
+/* Define a compat version of setcontext for glibc's before the fpcsr
+   field was added to mcontext_t.  The offset sigmask changed with this
+   introduction, the change was done because glibc's definition of
+   ucontext_t was initially defined incompatible with the Linux
+   definition of ucontext_t.  We keep the compatability definition to
+   allow getcontext, setcontext and swapcontext to work in older
+   binaries.  */
 
-	l.jr	r11
-	 l.ori	r11, r0, 0
+# undef __CONTEXT_FUNC_NAME
+# undef __CONTEXT_ENABLE_FPCSR
+# undef __CONTEXT_SIGMASK_OFFSET
+# undef __STARTCONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __setcontext_nofpcsr
+# define __CONTEXT_SIGMASK_OFFSET (UCONTEXT_SIGMASK - 4)
+# define __STARTCONTEXT_FUNC_NAME __startcontext_nofpcsr
 
-1:	l.j	__syscall_error
-	 l.ori	r3, r11, 0
+# include "setcontext-common.S"
 
-END (__setcontext)
-weak_alias (__setcontext, setcontext)
+compat_symbol (libc, __setcontext_nofpcsr, setcontext, GLIBC_2_35)
 
-	/* We add a NOP here because when the unwinder is looking for the
-	   enclosing function of the link register (r9) address FDE lookup will
-	   use '$r9 - 1' finding setcontext which is wrong.  This is because in
-	   makecontext we have set r9 to the start of &__startcontext.
-
-	   If this NOP did not exist the unwinder would repeatedly find
-	   __setcontext's FDE in an infinite loop.  Modifying/deleting the below
-	   __startcontext's FDE has no help on this.  */
-	l.nop
-
-ENTRY(__startcontext)
-
-	l.ori	r3, r14, 0
-	l.sfeq	r3, r0
-	/* If uc_link is not 0 resume there, otherwise exit.  */
-	l.bnf	__setcontext
-	 l.nop
-
-#ifdef SHARED
-	/* Obtain a pointer to .got in r16 */
-	l.jal	0x8
-	 l.movhi r16, gotpchi(_GLOBAL_OFFSET_TABLE_-4)
-	l.ori	r16, r16, gotpclo(_GLOBAL_OFFSET_TABLE_+0)
-	l.add	r16, r16, r9
-	l.lwz	r16, got(exit)(r16)
-	l.jr	r16
-#else
-	l.j	exit
 #endif
-	 l.nop
-
-END(__startcontext)
diff --git a/sysdeps/unix/sysv/linux/or1k/swapcontext-common.S b/sysdeps/unix/sysv/linux/or1k/swapcontext-common.S
new file mode 100644
index 0000000000..b7e2d4c820
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/or1k/swapcontext-common.S
@@ -0,0 +1,139 @@
+/* Swap two contexts.  OpenRISC version.
+   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This common swapcontext template helps define different
+   implementations using control macros.  Before including this file
+   in another file define the following:
+
+     __CONTEXT_FUNC_NAME
+     __CONTEXT_ENABLE_FPCSR
+     __CONTEXT_SIGMASK_OFFSET
+ */
+
+/* int swapcontext (ucontext_t *oucp, const ucontext_t *ucp) */
+	.text
+ENTRY(__CONTEXT_FUNC_NAME)
+
+	/* Same as getcontext.  */
+	/* Store r1, the stack pointer.  */
+	l.sw	(UCONTEXT_MCONTEXT +  1*4)(r3), r1
+	/* Store r2, the frame pointer.  */
+	l.sw	(UCONTEXT_MCONTEXT +  2*4)(r3), r2
+	/* Store r9, the link register.  */
+	l.sw	(UCONTEXT_MCONTEXT +  9*4)(r3), r9
+	/* Store r9 to reg[11] too, as we need two links for makecontext.  */
+	l.sw	(UCONTEXT_MCONTEXT + 11*4)(r3), r9
+	/* Store r10, the TLS register.  */
+	l.sw	(UCONTEXT_MCONTEXT + 10*4)(r3), r10
+	/* Store r14-r30 even, callee saved registers.  */
+	l.sw	(UCONTEXT_MCONTEXT + 14*4)(r3), r14
+	l.sw	(UCONTEXT_MCONTEXT + 16*4)(r3), r16
+	l.sw	(UCONTEXT_MCONTEXT + 18*4)(r3), r18
+	l.sw	(UCONTEXT_MCONTEXT + 20*4)(r3), r20
+	l.sw	(UCONTEXT_MCONTEXT + 22*4)(r3), r22
+	l.sw	(UCONTEXT_MCONTEXT + 24*4)(r3), r24
+	l.sw	(UCONTEXT_MCONTEXT + 26*4)(r3), r26
+	l.sw	(UCONTEXT_MCONTEXT + 28*4)(r3), r28
+	l.sw	(UCONTEXT_MCONTEXT + 30*4)(r3), r30
+
+#ifdef __CONTEXT_ENABLE_FPCSR
+# ifdef __or1k_hard_float__
+	/* Store the floating point state.  */
+	l.mfspr	r6, r0, 20
+	l.sw	(MCONTEXT_FPCSR)(r3), r6
+# else
+	/* Store zero to indicate default rounding as per softfloat.  */
+	l.sw	(MCONTEXT_FPCSR)(r3), r0
+# endif /* __or1k_hard_float__ */
+#endif /* __CONTEXT_ENABLE_FPCSR */
+	/* Store ucp to non-argument syscall preserved register.  */
+	l.ori	r30, r4, 0
+
+	/* Get signal mask.  */
+	/* rt_sigprocmask (SIG_BLOCK, NULL, &ucp->uc_sigmask, _NSIG8) */
+	l.ori	r6, r0, _NSIG8
+	l.addi	r5, r3, __CONTEXT_SIGMASK_OFFSET
+	l.ori	r4, r0, 0
+	l.ori	r3, r0, SIG_BLOCK
+	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
+	/* Do the syscall.  */
+	l.sys	1
+	 l.nop
+
+	/* if -4096 < ret < 0 holds, it's an error */
+	l.sfgeui r11, 0xf001
+	l.bf	1f
+	 l.nop
+
+	/* Same as setcontext.  */
+
+	/* Restore signal mask.  */
+	/* rt_sigprocmask (SIG_SETMASK, &ucp->uc_sigmask, NULL, _NSIG8) */
+	l.ori	r6, r0, _NSIG8
+	l.ori	r5, r0, 0
+	l.addi	r4, r30, __CONTEXT_SIGMASK_OFFSET
+	l.ori	r3, r0, SIG_SETMASK
+	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
+	/* Do the syscall.  */
+	l.sys 1
+	 l.nop
+
+	/* if -4096 < ret < 0 holds, it's an error */
+	l.sfgeui r11, 0xf001
+	l.bf	1f
+	 l.nop
+
+#ifdef __CONTEXT_ENABLE_FPCSR
+# ifdef __or1k_hard_float__
+	/* Restore the floating point state.  */
+	l.lwz	r28, (MCONTEXT_FPCSR)(r30)
+	l.mtspr	r0, r28, 20
+# endif /* __or1k_hard_float__ */
+#endif /* __CONTEXT_ENABLE_FPCSR */
+
+	/* Restore argument registers, for the makecontext case.  */
+	l.lwz	r3, (UCONTEXT_MCONTEXT +  3*4)(r30)
+	l.lwz	r4, (UCONTEXT_MCONTEXT +  4*4)(r30)
+	l.lwz	r5, (UCONTEXT_MCONTEXT +  5*4)(r30)
+	l.lwz	r6, (UCONTEXT_MCONTEXT +  6*4)(r30)
+	l.lwz	r7, (UCONTEXT_MCONTEXT +  7*4)(r30)
+	l.lwz	r8, (UCONTEXT_MCONTEXT +  8*4)(r30)
+
+	/* Restore registers stored in getcontext.  */
+	l.lwz	r1,  (UCONTEXT_MCONTEXT +  1*4)(r30)
+	l.lwz	r2,  (UCONTEXT_MCONTEXT +  2*4)(r30)
+	l.lwz	r9,  (UCONTEXT_MCONTEXT +  9*4)(r30)
+	l.lwz	r10, (UCONTEXT_MCONTEXT + 10*4)(r30)
+	l.lwz	r11, (UCONTEXT_MCONTEXT + 11*4)(r30)
+	l.lwz	r14, (UCONTEXT_MCONTEXT + 14*4)(r30)
+	l.lwz	r16, (UCONTEXT_MCONTEXT + 16*4)(r30)
+	l.lwz	r18, (UCONTEXT_MCONTEXT + 18*4)(r30)
+	l.lwz	r20, (UCONTEXT_MCONTEXT + 20*4)(r30)
+	l.lwz	r22, (UCONTEXT_MCONTEXT + 22*4)(r30)
+	l.lwz	r24, (UCONTEXT_MCONTEXT + 24*4)(r30)
+	l.lwz	r26, (UCONTEXT_MCONTEXT + 26*4)(r30)
+	l.lwz	r28, (UCONTEXT_MCONTEXT + 28*4)(r30)
+	l.lwz	r30, (UCONTEXT_MCONTEXT + 30*4)(r30)
+
+	l.jr	r11
+	 l.ori	r11, r0, 0
+
+1:	l.j	__syscall_error
+	 l.ori	r3, r11, 0
+
+END (__CONTEXT_FUNC_NAME)
diff --git a/sysdeps/unix/sysv/linux/or1k/swapcontext.S b/sysdeps/unix/sysv/linux/or1k/swapcontext.S
index d09651a5b2..861c1e26ba 100644
--- a/sysdeps/unix/sysv/linux/or1k/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/or1k/swapcontext.S
@@ -16,101 +16,36 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <shlib-compat.h>
 #include <sysdep.h>
 #include "ucontext_i.h"
 
-/* int swapcontext (ucontext_t *oucp, const ucontext_t *ucp) */
-	.text
-ENTRY(__swapcontext)
+#define __CONTEXT_FUNC_NAME __swapcontext
+#define __CONTEXT_ENABLE_FPCSR 1
+#define __CONTEXT_SIGMASK_OFFSET UCONTEXT_SIGMASK
 
-	/* Same as getcontext.  */
-	/* Store r1, the stack pointer.  */
-	l.sw	(UCONTEXT_MCONTEXT +  1*4)(r3), r1
-	/* Store r2, the frame pointer.  */
-	l.sw	(UCONTEXT_MCONTEXT +  2*4)(r3), r2
-	/* Store r9, the link register.  */
-	l.sw	(UCONTEXT_MCONTEXT +  9*4)(r3), r9
-	/* Store r9 to reg[11] too, as we need two links for makecontext.  */
-	l.sw	(UCONTEXT_MCONTEXT + 11*4)(r3), r9
-	/* Store r10, the TLS register.  */
-	l.sw	(UCONTEXT_MCONTEXT + 10*4)(r3), r10
-	/* Store r14-r30 even, callee saved registers.  */
-	l.sw	(UCONTEXT_MCONTEXT + 14*4)(r3), r14
-	l.sw	(UCONTEXT_MCONTEXT + 16*4)(r3), r16
-	l.sw	(UCONTEXT_MCONTEXT + 18*4)(r3), r18
-	l.sw	(UCONTEXT_MCONTEXT + 20*4)(r3), r20
-	l.sw	(UCONTEXT_MCONTEXT + 22*4)(r3), r22
-	l.sw	(UCONTEXT_MCONTEXT + 24*4)(r3), r24
-	l.sw	(UCONTEXT_MCONTEXT + 26*4)(r3), r26
-	l.sw	(UCONTEXT_MCONTEXT + 28*4)(r3), r28
-	l.sw	(UCONTEXT_MCONTEXT + 30*4)(r3), r30
+#include "swapcontext-common.S"
 
-	/* Store ucp to non-argument syscall preserved register.  */
-	l.ori	r30, r4, 0
+versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_40)
 
-	/* Get signal mask.  */
-	/* rt_sigprocmask (SIG_BLOCK, NULL, &ucp->uc_sigmask, _NSIG8) */
-	l.ori	r6, r0, _NSIG8
-	l.addi	r5, r3, UCONTEXT_SIGMASK
-	l.ori	r4, r0, 0
-	l.ori	r3, r0, SIG_BLOCK
-	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
-	/* Do the syscall.  */
-	l.sys	1
-	 l.nop
+#if SHLIB_COMPAT (libc, GLIBC_2_35, GLIBC_2_40)
 
-	/* if -4096 < ret < 0 holds, it's an error */
-	l.sfgeui r11, 0xf001
-	l.bf	1f
-	 l.nop
+/* Define a compat version of swapcontext for glibc's before the fpcsr
+   field was added to mcontext_t.  The offset sigmask changed with this
+   introduction, the change was done because glibc's definition of
+   ucontext_t was initially defined incompatible with the Linux
+   definition of ucontext_t.  We keep the compatability definition to
+   allow getcontext, setcontext and swapcontext to work in older
+   binaries.  */
 
-	/* Same as setcontext.  */
+# undef __CONTEXT_FUNC_NAME
+# undef __CONTEXT_ENABLE_FPCSR
+# undef __CONTEXT_SIGMASK_OFFSET
+# define __CONTEXT_FUNC_NAME __swapcontext_nofpcsr
+# define __CONTEXT_SIGMASK_OFFSET (UCONTEXT_SIGMASK - 4)
 
-	/* Restore signal mask.  */
-	/* rt_sigprocmask (SIG_SETMASK, &ucp->uc_sigmask, NULL, _NSIG8) */
-	l.ori	r6, r0, _NSIG8
-	l.ori	r5, r0, 0
-	l.addi	r4, r30, UCONTEXT_SIGMASK
-	l.ori	r3, r0, SIG_SETMASK
-	l.ori	r11, r0, SYS_ify (rt_sigprocmask)
-	/* Do the syscall.  */
-	l.sys 1
-	 l.nop
+# include "swapcontext-common.S"
 
-	/* if -4096 < ret < 0 holds, it's an error */
-	l.sfgeui r11, 0xf001
-	l.bf	1f
-	 l.nop
+compat_symbol (libc, __swapcontext_nofpcsr, swapcontext, GLIBC_2_35)
 
-	/* Restore argument registers, for the makecontext case.  */
-	l.lwz	r3, (UCONTEXT_MCONTEXT +  3*4)(r30)
-	l.lwz	r4, (UCONTEXT_MCONTEXT +  4*4)(r30)
-	l.lwz	r5, (UCONTEXT_MCONTEXT +  5*4)(r30)
-	l.lwz	r6, (UCONTEXT_MCONTEXT +  6*4)(r30)
-	l.lwz	r7, (UCONTEXT_MCONTEXT +  7*4)(r30)
-	l.lwz	r8, (UCONTEXT_MCONTEXT +  8*4)(r30)
-
-	/* Restore registers stored in getcontext.  */
-	l.lwz	r1,  (UCONTEXT_MCONTEXT +  1*4)(r30)
-	l.lwz	r2,  (UCONTEXT_MCONTEXT +  2*4)(r30)
-	l.lwz	r9,  (UCONTEXT_MCONTEXT +  9*4)(r30)
-	l.lwz	r10, (UCONTEXT_MCONTEXT + 10*4)(r30)
-	l.lwz	r11, (UCONTEXT_MCONTEXT + 11*4)(r30)
-	l.lwz	r14, (UCONTEXT_MCONTEXT + 14*4)(r30)
-	l.lwz	r16, (UCONTEXT_MCONTEXT + 16*4)(r30)
-	l.lwz	r18, (UCONTEXT_MCONTEXT + 18*4)(r30)
-	l.lwz	r20, (UCONTEXT_MCONTEXT + 20*4)(r30)
-	l.lwz	r22, (UCONTEXT_MCONTEXT + 22*4)(r30)
-	l.lwz	r24, (UCONTEXT_MCONTEXT + 24*4)(r30)
-	l.lwz	r26, (UCONTEXT_MCONTEXT + 26*4)(r30)
-	l.lwz	r28, (UCONTEXT_MCONTEXT + 28*4)(r30)
-	l.lwz	r30, (UCONTEXT_MCONTEXT + 30*4)(r30)
-
-	l.jr	r11
-	 l.ori	r11, r0, 0
-
-1:	l.j	__syscall_error
-	 l.ori	r3, r11, 0
-
-END (__swapcontext)
-weak_alias (__swapcontext, swapcontext)
+#endif
diff --git a/sysdeps/unix/sysv/linux/or1k/sys/ucontext.h b/sysdeps/unix/sysv/linux/or1k/sys/ucontext.h
index b17e919154..1b428592ee 100644
--- a/sysdeps/unix/sysv/linux/or1k/sys/ucontext.h
+++ b/sysdeps/unix/sysv/linux/or1k/sys/ucontext.h
@@ -38,6 +38,7 @@ typedef struct
     unsigned long int __gprs[__NGREG];
     unsigned long int __pc;
     unsigned long int __sr;
+    unsigned long int __fpcsr;
   } mcontext_t;
 
 /* Userlevel context.  */
diff --git a/sysdeps/unix/sysv/linux/or1k/ucontext_i.sym b/sysdeps/unix/sysv/linux/or1k/ucontext_i.sym
index a8d4db080f..45cd72527d 100644
--- a/sysdeps/unix/sysv/linux/or1k/ucontext_i.sym
+++ b/sysdeps/unix/sysv/linux/or1k/ucontext_i.sym
@@ -13,6 +13,7 @@ _NSIG8				(_NSIG / 8)
 -- Offsets of the fields in the ucontext_t structure.
 #define ucontext(member)	offsetof (ucontext_t, member)
 #define stack(member)		ucontext (uc_stack.member)
+#define mcontext(member)	ucontext (uc_mcontext.member)
 
 UCONTEXT_LINK			ucontext (uc_link)
 UCONTEXT_STACK			ucontext (uc_stack)
@@ -23,4 +24,6 @@ STACK_SP			stack (ss_sp)
 STACK_SIZE			stack (ss_size)
 STACK_FLAGS			stack (ss_flags)
 
+MCONTEXT_FPCSR			mcontext (__fpcsr)
+
 UCONTEXT_SIZE			sizeof (ucontext_t)
diff --git a/sysdeps/unix/sysv/linux/pidfd_getpid.c b/sysdeps/unix/sysv/linux/pidfd_getpid.c
index 8567b413dd..6967477fab 100644
--- a/sysdeps/unix/sysv/linux/pidfd_getpid.c
+++ b/sysdeps/unix/sysv/linux/pidfd_getpid.c
@@ -74,8 +74,10 @@ parse_fdinfo (const char *l, void *arg)
 
       /* Ignore invalid large values.  */
       if (INT_MULTIPLY_WRAPV (10, n, &n)
-          || INT_ADD_WRAPV (n, *l++ - '0', &n))
+          || INT_ADD_WRAPV (n, *l - '0', &n))
         return -1;
+
+      l++;
     }
 
   /* -1 indicates that the process is terminated.  */
@@ -105,7 +107,7 @@ pidfd_getpid (int fd)
   *_fitoa_word (fd, p, 10, 0) = '\0';
 
   struct parse_fdinfo_t fdinfo = { .found = false, .pid = -1 };
-  if (!procutils_read_file (fdinfoname, parse_fdinfo, &fdinfo))
+  if (!__libc_procutils_read_file (fdinfoname, parse_fdinfo, &fdinfo))
     /* The fdinfo contains an invalid 'Pid:' value.  */
     return INLINE_SYSCALL_ERROR_RETURN_VALUE (EBADF);
 
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
index 4f88e0af9c..564eb87d4b 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
@@ -995,3 +995,9 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
index edc26140dc..a57eedb779 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
@@ -994,3 +994,9 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
index 0a8a1433d7..7564069a37 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
@@ -988,3 +988,9 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S
index 6fdb4a6073..900c354c9c 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S
@@ -154,4 +154,3 @@ L(parent):
 PSEUDO_END (__clone3)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
index 5174d20032..b02a7115af 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
@@ -1321,3 +1321,12 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 __log2p1ieee128 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/procutils.c b/sysdeps/unix/sysv/linux/procutils.c
index 25666ec0cb..86d3d37329 100644
--- a/sysdeps/unix/sysv/linux/procutils.c
+++ b/sysdeps/unix/sysv/linux/procutils.c
@@ -71,8 +71,9 @@ next_line (char **r, int fd, char *const buffer, char **cp, char **re,
 }
 
 bool
-procutils_read_file (const char *filename, procutils_closure_t closure,
-		     void *arg)
+__libc_procutils_read_file (const char *filename,
+			    procutils_closure_t closure,
+			    void *arg)
 {
   enum { buffer_size = PROCUTILS_MAX_LINE_LEN };
   char buffer[buffer_size];
diff --git a/sysdeps/unix/sysv/linux/procutils.h b/sysdeps/unix/sysv/linux/procutils.h
index 73ef3e5f5b..acf1ec587a 100644
--- a/sysdeps/unix/sysv/linux/procutils.h
+++ b/sysdeps/unix/sysv/linux/procutils.h
@@ -37,7 +37,8 @@ typedef int (*procutils_closure_t) (const char *line, void *arg);
 
    It returns true in case the file is fully read or false if CLOSURE
    returns a value diferent than 0.  */
-bool procutils_read_file (const char *filename, procutils_closure_t closure,
-			  void *arg) attribute_hidden;
+bool __libc_procutils_read_file (const char *filename,
+				 procutils_closure_t closure,
+				 void *arg) attribute_hidden;
 
 #endif
diff --git a/sysdeps/unix/sysv/linux/riscv/clone3.S b/sysdeps/unix/sysv/linux/riscv/clone3.S
index 29264be054..c81ee2ab51 100644
--- a/sysdeps/unix/sysv/linux/riscv/clone3.S
+++ b/sysdeps/unix/sysv/linux/riscv/clone3.S
@@ -76,4 +76,3 @@ L(thread_start):
 END(__thread_start_clone3)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
index 45ca5c677a..7bf0bd7c7a 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
@@ -1028,3 +1028,11 @@ GLIBC_2.35 fminimumf64x F
 GLIBC_2.35 fminimuml F
 GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
index 4838edf991..c22d9bb7c6 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
@@ -1125,3 +1125,11 @@ GLIBC_2.35 fminimumf64x F
 GLIBC_2.35 fminimuml F
 GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/Makefile b/sysdeps/unix/sysv/linux/s390/s390-32/Makefile
index 6b6d59b7dd..2a5b4fbb6f 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/Makefile
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/Makefile
@@ -3,6 +3,7 @@ default-abi := 32
 
 ifeq ($(subdir),login)
 sysdep_routines += utmp32 utmpx32 login32
+shared-only-routines += utmp32 utmpx32 login32
 endif
 
 ifeq ($(subdir),misc)
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone3.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone3.S
index d344c2cf36..1ea633aabb 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/clone3.S
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone3.S
@@ -75,4 +75,3 @@ thread_start:
 	ASM_SIZE_DIRECTIVE (thread_start)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
index 5ff11fb54f..2fb712ec71 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
@@ -1252,3 +1252,11 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone3.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone3.S
index ca382d903c..70ae7f1532 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/clone3.S
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone3.S
@@ -75,4 +75,3 @@ thread_start:
 	ASM_SIZE_DIRECTIVE (thread_start)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
index 0e246c2c54..40489589a7 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
@@ -1252,3 +1252,11 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/sh/be/libm.abilist b/sysdeps/unix/sysv/linux/sh/be/libm.abilist
index 7b43a866e2..a5363e90d6 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
 GLIBC_2.4 exp2l F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libm.abilist b/sysdeps/unix/sysv/linux/sh/le/libm.abilist
index 7b43a866e2..a5363e90d6 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libm.abilist
@@ -848,3 +848,9 @@ GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
 GLIBC_2.4 exp2l F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
index e3dcf3d4e7..9bda9bdeb5 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
@@ -1259,3 +1259,11 @@ GLIBC_2.4 truncl F
 GLIBC_2.4 y0l F
 GLIBC_2.4 y1l F
 GLIBC_2.4 ynl F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
index 20fef20c8b..61d2aa05a9 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
@@ -1149,3 +1149,11 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/sys/epoll.h b/sysdeps/unix/sysv/linux/sys/epoll.h
index fc8dce45c8..45e546fa44 100644
--- a/sysdeps/unix/sysv/linux/sys/epoll.h
+++ b/sysdeps/unix/sysv/linux/sys/epoll.h
@@ -19,6 +19,7 @@
 #define	_SYS_EPOLL_H	1
 
 #include <stdint.h>
+#include <sys/ioctl.h>
 #include <sys/types.h>
 
 #include <bits/types/sigset_t.h>
@@ -87,6 +88,19 @@ struct epoll_event
   epoll_data_t data;	/* User data variable */
 } __EPOLL_PACKED;
 
+struct epoll_params
+{
+  uint32_t busy_poll_usecs;
+  uint16_t busy_poll_budget;
+  uint8_t prefer_busy_poll;
+
+  /* pad the struct to a multiple of 64bits */
+  uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
 
 __BEGIN_DECLS
 
diff --git a/sysdeps/unix/sysv/linux/sys/pidfd.h b/sysdeps/unix/sysv/linux/sys/pidfd.h
index 1078322062..9f88d297e8 100644
--- a/sysdeps/unix/sysv/linux/sys/pidfd.h
+++ b/sysdeps/unix/sysv/linux/sys/pidfd.h
@@ -22,12 +22,14 @@
 #include <bits/types/siginfo_t.h>
 
 #define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_THREAD O_EXCL
 
-/* Returns a file descriptor that refers to the process PID.  The
-   close-on-exec is set on the file descriptor.
+#define PIDFD_SIGNAL_THREAD (1UL << 0)
+#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
+#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
 
-   The FLAGS argument is reserved for future use, it must be specified
-   as 0.  */
+/* Returns a file descriptor that refers to the process PID.  The
+   close-on-exec is set on the file descriptor.  */
 extern int pidfd_open (__pid_t __pid, unsigned int __flags) __THROW;
 
 /* Duplicates an existing file descriptor TARGETFD in the process referred
@@ -39,10 +41,7 @@ extern int pidfd_getfd (int __pidfd, int __targetfd,
 			unsigned int __flags) __THROW;
 
 /* Sends the signal SIG to the target process referred by the PIDFD.  If
-   INFO points to a siginfo_t buffer, it will be populated.
-
-   The FLAGS argument is reserved for future use, it must be specified
-   as 0.  */
+   INFO points to a siginfo_t buffer, it will be populated.  */
 extern int pidfd_send_signal (int __pidfd, int __sig, siginfo_t *__info,
 			      unsigned int __flags) __THROW;
 
diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list
index 6557bcfde4..672d39eaad 100644
--- a/sysdeps/unix/sysv/linux/syscall-names.list
+++ b/sysdeps/unix/sysv/linux/syscall-names.list
@@ -21,8 +21,8 @@
 # This file can list all potential system calls.  The names are only
 # used if the installed kernel headers also provide them.
 
-# The list of system calls is current as of Linux 6.8.
-kernel 6.8
+# The list of system calls is current as of Linux 6.9.
+kernel 6.9
 
 FAST_atomic_update
 FAST_cmpxchg
diff --git a/sysdeps/unix/sysv/linux/timespec_get.c b/sysdeps/unix/sysv/linux/timespec_get.c
index c6e5e66289..778d1e3354 100644
--- a/sysdeps/unix/sysv/linux/timespec_get.c
+++ b/sysdeps/unix/sysv/linux/timespec_get.c
@@ -5,7 +5,7 @@
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2.1 of the License.
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/sysdeps/unix/sysv/linux/timespec_getres.c b/sysdeps/unix/sysv/linux/timespec_getres.c
index 5acebe2a2c..2eef9e512c 100644
--- a/sysdeps/unix/sysv/linux/timespec_getres.c
+++ b/sysdeps/unix/sysv/linux/timespec_getres.c
@@ -5,7 +5,7 @@
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2.1 of the License.
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/sysdeps/unix/sysv/linux/tst-epoll-ioctls.c b/sysdeps/unix/sysv/linux/tst-epoll-ioctls.c
new file mode 100644
index 0000000000..618ecc4e86
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-epoll-ioctls.c
@@ -0,0 +1,92 @@
+/* Basic tests for Linux epoll ioctls.
+   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <intprops.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/process_state.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/xsignal.h>
+#include <support/xunistd.h>
+#include <sys/ioctl.h>
+#include <sys/epoll.h>
+
+static void
+test_epoll_ioctl (void)
+{
+  int efd = epoll_create1 (0);
+  TEST_VERIFY_EXIT (efd != -1);
+
+  struct epoll_params params;
+
+  TEST_COMPARE (ioctl (efd, EPIOCGPARAMS, &params), 0);
+
+  /* parameters are all 0 by default */
+  TEST_COMPARE (params.busy_poll_usecs, 0);
+  TEST_COMPARE (params.busy_poll_budget, 0);
+  TEST_COMPARE (params.prefer_busy_poll, 0);
+  TEST_COMPARE (params.__pad, 0);
+
+  /* set custom parameters */
+  params.busy_poll_usecs = 40;
+  params.busy_poll_budget = 8;
+  params.prefer_busy_poll = 1;
+  params.__pad = 0;
+
+  TEST_COMPARE (ioctl (efd, EPIOCSPARAMS, &params), 0);
+
+  memset (&params, 0, sizeof (params));
+
+  TEST_COMPARE (ioctl (efd, EPIOCGPARAMS, &params), 0);
+
+  /* check custom values were retrieved after being set */
+  TEST_COMPARE (params.busy_poll_usecs, 40);
+  TEST_COMPARE (params.busy_poll_budget, 8);
+  TEST_COMPARE (params.prefer_busy_poll, 1);
+  TEST_COMPARE (params.__pad, 0);
+
+  xclose (efd);
+}
+
+static bool
+ioctl_supported (void)
+{
+  int efd = epoll_create1 (0);
+  TEST_VERIFY_EXIT (efd != -1);
+
+  struct epoll_params params;
+  int r = ioctl (efd, EPIOCGPARAMS, &params);
+  xclose (efd);
+
+  return (r == 0);
+}
+
+static int
+do_test (void)
+{
+  if (ioctl_supported ())
+    test_epoll_ioctl ();
+  else
+    return EXIT_UNSUPPORTED;
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py
index 56c0cf3e22..4a8f4e8919 100644
--- a/sysdeps/unix/sysv/linux/tst-mman-consts.py
+++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py
@@ -33,7 +33,7 @@ def main():
                         help='C compiler (including options) to use')
     args = parser.parse_args()
     linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
-    linux_version_glibc = (6, 8)
+    linux_version_glibc = (6, 9)
     sys.exit(glibcextract.compare_macro_consts(
         '#define _GNU_SOURCE 1\n'
         '#include <sys/mman.h>\n',
diff --git a/sysdeps/unix/sysv/linux/tst-mount-consts.py b/sysdeps/unix/sysv/linux/tst-mount-consts.py
index 8613db96d7..c4a67221c1 100755
--- a/sysdeps/unix/sysv/linux/tst-mount-consts.py
+++ b/sysdeps/unix/sysv/linux/tst-mount-consts.py
@@ -39,10 +39,10 @@ def main():
         sys.exit (77)
 
     linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
-    # Constants in glibc were updated to match Linux v6.8.  When glibc
+    # Constants in glibc were updated to match Linux v6.9.  When glibc
     # constants are updated this value should be updated to match the
     # released kernel version from which the constants were taken.
-    linux_version_glibc = (6, 8)
+    linux_version_glibc = (6, 9)
     def check(cte, exclude=None):
         return glibcextract.compare_macro_consts(
                 '#include <sys/mount.h>\n',
diff --git a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py
index 96875ac266..6f05291949 100644
--- a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py
+++ b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py
@@ -39,7 +39,7 @@ def main():
         sys.exit (77)
 
     linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
-    linux_version_glibc = (6, 8)
+    linux_version_glibc = (6, 9)
     sys.exit(glibcextract.compare_macro_consts(
                 '#include <sys/pidfd.h>\n',
                 '#include <asm/fcntl.h>\n'
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
index c1c5c76e26..bbf646fe7f 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
@@ -1182,3 +1182,11 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
index 5a2d42234e..4cc19e066c 100644
--- a/sysdeps/unix/sysv/linux/x86_64/clone3.S
+++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S
@@ -86,4 +86,3 @@ L(thread_start):
 PSEUDO_END (__clone3)
 
 libc_hidden_def (__clone3)
-weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
index fac219d45a..7c7b884dde 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
@@ -1182,3 +1182,11 @@ GLIBC_2.35 hypotf F
 GLIBC_2.38 fmod F
 GLIBC_2.38 fmodf F
 GLIBC_2.39 exp10 F
+GLIBC_2.40 log2p1 F
+GLIBC_2.40 log2p1f F
+GLIBC_2.40 log2p1f128 F
+GLIBC_2.40 log2p1f32 F
+GLIBC_2.40 log2p1f32x F
+GLIBC_2.40 log2p1f64 F
+GLIBC_2.40 log2p1f64x F
+GLIBC_2.40 log2p1l F
diff --git a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
index ab73556772..83491607c7 100644
--- a/sysdeps/x86/cacheinfo.h
+++ b/sysdeps/x86/cacheinfo.h
@@ -35,9 +35,12 @@ long int __x86_data_cache_size attribute_hidden = 32 * 1024;
 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
 
-/* Threshold to use non temporal store.  */
+/* Threshold to use non temporal store in memmove.  */
 long int __x86_shared_non_temporal_threshold attribute_hidden;
 
+/* Threshold to use non temporal store in memset.  */
+long int __x86_memset_non_temporal_threshold attribute_hidden;
+
 /* Threshold to use Enhanced REP MOVSB.  */
 long int __x86_rep_movsb_threshold attribute_hidden = 2048;
 
@@ -77,6 +80,9 @@ init_cacheinfo (void)
   __x86_shared_non_temporal_threshold
     = cpu_features->non_temporal_threshold;
 
+  __x86_memset_non_temporal_threshold
+      = cpu_features->memset_non_temporal_threshold;
+
   __x86_rep_movsb_threshold = cpu_features->rep_movsb_threshold;
   __x86_rep_stosb_threshold = cpu_features->rep_stosb_threshold;
   __x86_rep_movsb_stop_threshold =  cpu_features->rep_movsb_stop_threshold;
diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
index d28d9bcb29..04c6ba3e6c 100644
--- a/sysdeps/x86/configure
+++ b/sysdeps/x86/configure
@@ -139,8 +139,12 @@ libc_cv_have_x86_isa_level=4
 libc_cv_have_x86_isa_level=3
 #elif MINIMUM_X86_ISA_LEVEL == 2
 libc_cv_have_x86_isa_level=2
-#else
+#elif defined __x86_64__
 libc_cv_have_x86_isa_level=baseline
+#elif MINIMUM_X86_ISA_LEVEL == 1
+libc_cv_have_x86_isa_level=1
+#else
+libc_cv_have_x86_isa_level=0
 #endif
 EOF
 		 eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
@@ -148,8 +152,10 @@ EOF
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5
 printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; }
-else
+elif test $base_machine = x86_64; then
   libc_cv_have_x86_isa_level=baseline
+else
+  libc_cv_have_x86_isa_level=0
 fi
 if test $libc_cv_have_x86_isa_level = baseline; then
   printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL 1" >>confdefs.h
diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
index 5b0acd03d2..8a259d3971 100644
--- a/sysdeps/x86/configure.ac
+++ b/sysdeps/x86/configure.ac
@@ -96,14 +96,20 @@ libc_cv_have_x86_isa_level=4
 libc_cv_have_x86_isa_level=3
 #elif MINIMUM_X86_ISA_LEVEL == 2
 libc_cv_have_x86_isa_level=2
-#else
+#elif defined __x86_64__
 libc_cv_have_x86_isa_level=baseline
+#elif MINIMUM_X86_ISA_LEVEL == 1
+libc_cv_have_x86_isa_level=1
+#else
+libc_cv_have_x86_isa_level=0
 #endif
 EOF
 		 eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
 		 rm -rf conftest*])
-else
+elif test $base_machine = x86_64; then
   libc_cv_have_x86_isa_level=baseline
+else
+  libc_cv_have_x86_isa_level=0
 fi
 if test $libc_cv_have_x86_isa_level = baseline; then
   AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, 1)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index 5a98f70364..d2fe61b997 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -986,6 +986,13 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
     rep_movsb_threshold = 2112;
 
+  /* Non-temporal stores are more performant on Intel and AMD hardware above
+     non_temporal_threshold. Enable this for both Intel and AMD hardware. */
+  unsigned long int memset_non_temporal_threshold = SIZE_MAX;
+  if (cpu_features->basic.kind == arch_kind_intel
+      || cpu_features->basic.kind == arch_kind_amd)
+      memset_non_temporal_threshold = non_temporal_threshold;
+
    /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
       cases slower than the vectorized path (and for some alignments,
       it is really slow, check BZ #30994).  */
@@ -1012,6 +1019,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
       && tunable_size <= maximum_non_temporal_threshold)
     non_temporal_threshold = tunable_size;
 
+  tunable_size = TUNABLE_GET (x86_memset_non_temporal_threshold, long int, NULL);
+  if (tunable_size > minimum_non_temporal_threshold
+      && tunable_size <= maximum_non_temporal_threshold)
+    memset_non_temporal_threshold = tunable_size;
+
   tunable_size = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
   if (tunable_size > minimum_rep_movsb_threshold)
     rep_movsb_threshold = tunable_size;
@@ -1032,6 +1044,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold,
 			   minimum_non_temporal_threshold,
 			   maximum_non_temporal_threshold);
+  TUNABLE_SET_WITH_BOUNDS (
+      x86_memset_non_temporal_threshold, memset_non_temporal_threshold,
+      minimum_non_temporal_threshold, maximum_non_temporal_threshold);
   TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold,
 			   minimum_rep_movsb_threshold, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,
@@ -1045,6 +1060,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   cpu_features->data_cache_size = data;
   cpu_features->shared_cache_size = shared;
   cpu_features->non_temporal_threshold = non_temporal_threshold;
+  cpu_features->memset_non_temporal_threshold = memset_non_temporal_threshold;
   cpu_features->rep_movsb_threshold = rep_movsb_threshold;
   cpu_features->rep_stosb_threshold = rep_stosb_threshold;
   cpu_features->rep_movsb_stop_threshold = rep_movsb_stop_threshold;
diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
index ceafde9481..49eeb5f70a 100644
--- a/sysdeps/x86/dl-diagnostics-cpu.c
+++ b/sysdeps/x86/dl-diagnostics-cpu.c
@@ -94,6 +94,8 @@ _dl_diagnostics_cpu (void)
                             cpu_features->shared_cache_size);
   print_cpu_features_value ("non_temporal_threshold",
                             cpu_features->non_temporal_threshold);
+  print_cpu_features_value ("memset_non_temporal_threshold",
+                            cpu_features->memset_non_temporal_threshold);
   print_cpu_features_value ("rep_movsb_threshold",
                             cpu_features->rep_movsb_threshold);
   print_cpu_features_value ("rep_movsb_stop_threshold",
diff --git a/sysdeps/x86/dl-get-cpu-features.c b/sysdeps/x86/dl-get-cpu-features.c
index 4d6c5c59a6..579d02d638 100644
--- a/sysdeps/x86/dl-get-cpu-features.c
+++ b/sysdeps/x86/dl-get-cpu-features.c
@@ -64,6 +64,11 @@ Fatal glibc error: CPU does not support x86-64-v%d\n", 4);
 #    endif /* ISA level 4 */
 #   endif /* ISA level 3 */
 #  endif /* ISA level 2 */
+# ifdef GCCMACRO__APX_F__
+      if (!CPU_FEATURE_USABLE_P (cpu_features, APX_F))
+	_dl_fatal_printf ("\
+Fatal glibc error: CPU does not support APX\n");
+# endif
 # endif /* IS_IN (rtld) */
     }
 }
diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
index 7d82da0dec..a0a1299592 100644
--- a/sysdeps/x86/dl-tunables.list
+++ b/sysdeps/x86/dl-tunables.list
@@ -30,6 +30,9 @@ glibc {
     x86_non_temporal_threshold {
       type: SIZE_T
     }
+    x86_memset_non_temporal_threshold {
+      type: SIZE_T
+    }
     x86_rep_movsb_threshold {
       type: SIZE_T
       # Since there is overhead to set up REP MOVSB operation, REP
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index cd7bd27cf3..aaae44f0e1 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -944,8 +944,10 @@ struct cpu_features
   /* Shared cache size for use in memory and string routines, typically
      L2 or L3 size.  */
   unsigned long int shared_cache_size;
-  /* Threshold to use non temporal store.  */
+  /* Threshold to use non temporal store in memmove.  */
   unsigned long int non_temporal_threshold;
+  /* Threshold to use non temporal store in memset.  */
+  unsigned long int memset_non_temporal_threshold;
   /* Threshold to use "rep movsb".  */
   unsigned long int rep_movsb_threshold;
   /* Threshold to stop using "rep movsb".  */
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 2c7f74212b..03c1fe2bf5 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -35,7 +35,17 @@
 # define __X86_ISA_V1 0
 #endif
 
-#if __X86_ISA_V1 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16               \
+#ifdef __x86_64__
+# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
+#  define __GCC_HAVE_SYNC_COMPARE_AND_SWAP
+# endif
+#else
+# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+#  define __GCC_HAVE_SYNC_COMPARE_AND_SWAP
+# endif
+#endif
+
+#if __X86_ISA_V1 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP		      \
     && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ && defined __SSE3__   \
     && defined __SSSE3__ && defined __SSE4_1__ && defined __SSE4_2__
 /* NB: ISAs in x86-64 ISA level v2 are used.  */
diff --git a/sysdeps/x86/tst-cpu-features-supports.c b/sysdeps/x86/tst-cpu-features-supports.c
index e270c29db7..a50afea2f9 100644
--- a/sysdeps/x86/tst-cpu-features-supports.c
+++ b/sysdeps/x86/tst-cpu-features-supports.c
@@ -65,7 +65,7 @@ do_test (int argc, char **argv)
 #endif
   fails += CHECK_FEATURE_ACTIVE (avx, AVX);
   fails += CHECK_FEATURE_ACTIVE (avx2, AVX2);
-#if __GNUC_PREREQ (7, 0)
+#if __GNUC_PREREQ (7, 0) && !__GNUC_PREREQ (15, 0)
   fails += CHECK_FEATURE_ACTIVE (avx5124fmaps, AVX512_4FMAPS);
   fails += CHECK_FEATURE_ACTIVE (avx5124vnniw, AVX512_4VNNIW);
 #endif
@@ -92,14 +92,18 @@ do_test (int argc, char **argv)
 #if __GNUC_PREREQ (6, 0)
   fails += CHECK_FEATURE_ACTIVE (avx512bw, AVX512BW);
   fails += CHECK_FEATURE_ACTIVE (avx512cd, AVX512CD);
+# if !__GNUC_PREREQ (15, 0)
   fails += CHECK_FEATURE_ACTIVE (avx512er, AVX512ER);
+# endif
   fails += CHECK_FEATURE_ACTIVE (avx512dq, AVX512DQ);
 #endif
 #if __GNUC_PREREQ (5, 0)
   fails += CHECK_FEATURE_ACTIVE (avx512f, AVX512F);
 #endif
 #if __GNUC_PREREQ (6, 0)
+# if !__GNUC_PREREQ (15, 0)
   fails += CHECK_FEATURE_ACTIVE (avx512pf, AVX512PF);
+# endif
   fails += CHECK_FEATURE_ACTIVE (avx512vl, AVX512VL);
 #endif
 #if __GNUC_PREREQ (5, 0)
@@ -148,7 +152,9 @@ do_test (int argc, char **argv)
 #endif
   fails += CHECK_FEATURE_ACTIVE (popcnt, POPCNT);
 #if __GNUC_PREREQ (11, 0)
+# if !__GNUC_PREREQ (15, 0)
   fails += CHECK_FEATURE_ACTIVE (prefetchwt1, PREFETCHWT1);
+# endif
   fails += CHECK_FEATURE_ACTIVE (ptwrite, PTWRITE);
   fails += CHECK_FEATURE_ACTIVE (rdpid, RDPID);
   fails += CHECK_FEATURE_ACTIVE (rdrnd, RDRAND);
diff --git a/sysdeps/x86/tst-hwcap-tunables.c b/sysdeps/x86/tst-hwcap-tunables.c
index f6a65b88de..8589a9fd66 100644
--- a/sysdeps/x86/tst-hwcap-tunables.c
+++ b/sysdeps/x86/tst-hwcap-tunables.c
@@ -133,7 +133,7 @@ do_test (int argc, char *argv[])
       setenv ("GLIBC_TUNABLES", tunable, 1);
 
       struct support_capture_subprocess result
-	= support_capture_subprogram (spargv[0], spargv);
+	= support_capture_subprogram (spargv[0], spargv, NULL);
       support_capture_subprocess_check (&result, "tst-tunables", 0,
 					sc_allow_stderr);
       support_capture_subprocess_free (&result);
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ff5d45f7cb..a6de3793e4 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -245,10 +245,11 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rela) *reloc,
    MAP is the object containing the reloc.  */
 
 static inline void __attribute__((always_inline))
-elf_machine_rela(struct link_map *map, struct r_scope_elem *scope[],
-		 const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
-		 const struct r_found_version *version,
-		 void *const reloc_addr_arg, int skip_ifunc) {
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+		  const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+		  const struct r_found_version *version,
+		  void *const reloc_addr_arg, int skip_ifunc)
+{
   ElfW(Addr) *const reloc_addr = reloc_addr_arg;
   const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
 
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index e0015347d0..893c22b19b 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1847,6 +1847,30 @@ float: 1
 Function: "log2_vlen8_avx2":
 float: 1
 
+Function: "log2p1":
+double: 1
+float: 1
+float128: 3
+ldouble: 2
+
+Function: "log2p1_downward":
+double: 2
+float: 2
+float128: 3
+ldouble: 4
+
+Function: "log2p1_towardzero":
+double: 2
+float: 2
+float128: 2
+ldouble: 4
+
+Function: "log2p1_upward":
+double: 1
+float: 2
+float128: 2
+ldouble: 5
+
 Function: "log_downward":
 float: 2
 float128: 1
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 97839a2248..88bf08e4f4 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -21,10 +21,13 @@
    2. If size is less than VEC, use integer register stores.
    3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
    4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
-   5. On machines ERMS feature, if size is greater or equal than
-      __x86_rep_stosb_threshold then REP STOSB will be used.
-   6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
-      4 VEC stores and store 4 * VEC at a time until done.  */
+   5. If size is more to 4 * VEC_SIZE, align to 1 * VEC_SIZE with
+      4 VEC stores and store 4 * VEC at a time until done.
+   6. On machines ERMS feature, if size is range
+	  [__x86_rep_stosb_threshold, __x86_memset_non_temporal_threshold)
+	  then REP STOSB will be used.
+   7. If size >= __x86_memset_non_temporal_threshold, use a
+	  non-temporal stores.  */
 
 #include <sysdep.h>
 
@@ -147,6 +150,41 @@ L(entry_from_wmemset):
 	VMOVU	%VMM(0), -VEC_SIZE(%rdi,%rdx)
 	VMOVU	%VMM(0), (%rdi)
 	VZEROUPPER_RETURN
+
+	/* If have AVX512 mask instructions put L(less_vec) close to
+	   entry as it doesn't take much space and is likely a hot target.  */
+#ifdef USE_LESS_VEC_MASK_STORE
+    /* Align to ensure the L(less_vec) logic all fits in 1x cache lines.  */
+	.p2align 6,, 47
+	.p2align 4
+L(less_vec):
+L(less_vec_from_wmemset):
+	/* Less than 1 VEC.  */
+# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+#  error Unsupported VEC_SIZE!
+# endif
+	/* Clear high bits from edi. Only keeping bits relevant to page
+	   cross check. Note that we are using rax which is set in
+	   MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out.  */
+	andl	$(PAGE_SIZE - 1), %edi
+	/* Check if VEC_SIZE store cross page. Mask stores suffer
+	   serious performance degradation when it has to fault suppress.  */
+	cmpl	$(PAGE_SIZE - VEC_SIZE), %edi
+	/* This is generally considered a cold target.  */
+	ja	L(cross_page)
+# if VEC_SIZE > 32
+	movq	$-1, %rcx
+	bzhiq	%rdx, %rcx, %rcx
+	kmovq	%rcx, %k1
+# else
+	movl	$-1, %ecx
+	bzhil	%edx, %ecx, %ecx
+	kmovd	%ecx, %k1
+# endif
+	vmovdqu8 %VMM(0), (%rax){%k1}
+	VZEROUPPER_RETURN
+#endif
+
 #if defined USE_MULTIARCH && IS_IN (libc)
 END (MEMSET_SYMBOL (__memset, unaligned))
 
@@ -185,54 +223,6 @@ L(last_2x_vec):
 #endif
 	VZEROUPPER_RETURN
 
-	/* If have AVX512 mask instructions put L(less_vec) close to
-	   entry as it doesn't take much space and is likely a hot target.
-	 */
-#ifdef USE_LESS_VEC_MASK_STORE
-	.p2align 4,, 10
-L(less_vec):
-L(less_vec_from_wmemset):
-	/* Less than 1 VEC.  */
-# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
-#  error Unsupported VEC_SIZE!
-# endif
-	/* Clear high bits from edi. Only keeping bits relevant to page
-	   cross check. Note that we are using rax which is set in
-	   MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out.  */
-	andl	$(PAGE_SIZE - 1), %edi
-	/* Check if VEC_SIZE store cross page. Mask stores suffer
-	   serious performance degradation when it has to fault suppress.
-	 */
-	cmpl	$(PAGE_SIZE - VEC_SIZE), %edi
-	/* This is generally considered a cold target.  */
-	ja	L(cross_page)
-# if VEC_SIZE > 32
-	movq	$-1, %rcx
-	bzhiq	%rdx, %rcx, %rcx
-	kmovq	%rcx, %k1
-# else
-	movl	$-1, %ecx
-	bzhil	%edx, %ecx, %ecx
-	kmovd	%ecx, %k1
-# endif
-	vmovdqu8 %VMM(0), (%rax){%k1}
-	VZEROUPPER_RETURN
-
-# if defined USE_MULTIARCH && IS_IN (libc)
-	/* Include L(stosb_local) here if including L(less_vec) between
-	   L(stosb_more_2x_vec) and ENTRY. This is to cache align the
-	   L(stosb_more_2x_vec) target.  */
-	.p2align 4,, 10
-L(stosb_local):
-	movzbl	%sil, %eax
-	mov	%RDX_LP, %RCX_LP
-	mov	%RDI_LP, %RDX_LP
-	rep	stosb
-	mov	%RDX_LP, %RAX_LP
-	VZEROUPPER_RETURN
-# endif
-#endif
-
 #if defined USE_MULTIARCH && IS_IN (libc)
 	.p2align 4
 L(stosb_more_2x_vec):
@@ -318,21 +308,33 @@ L(return_vzeroupper):
 	ret
 #endif
 
-	.p2align 4,, 10
-#ifndef USE_LESS_VEC_MASK_STORE
-# if defined USE_MULTIARCH && IS_IN (libc)
+#ifdef USE_WITH_AVX2
+	.p2align 4
+#else
+	.p2align 4,, 4
+#endif
+
+#if defined USE_MULTIARCH && IS_IN (libc)
 	/* If no USE_LESS_VEC_MASK put L(stosb_local) here. Will be in
 	   range for 2-byte jump encoding.  */
 L(stosb_local):
+	cmp	__x86_memset_non_temporal_threshold(%rip), %RDX_LP
+	jae	L(nt_memset)
 	movzbl	%sil, %eax
 	mov	%RDX_LP, %RCX_LP
 	mov	%RDI_LP, %RDX_LP
 	rep	stosb
+# if (defined USE_WITH_SSE2) || (defined USE_WITH_AVX512)
+	/* Use xchg to save 1-byte (this helps align targets below).  */
+	xchg	%RDX_LP, %RAX_LP
+# else
 	mov	%RDX_LP, %RAX_LP
-	VZEROUPPER_RETURN
 # endif
+	VZEROUPPER_RETURN
+#endif
+#ifndef USE_LESS_VEC_MASK_STORE
 	/* Define L(less_vec) only if not otherwise defined.  */
-	.p2align 4
+	.p2align 4,, 12
 L(less_vec):
 	/* Broadcast esi to partial register (i.e VEC_SIZE == 32 broadcast to
 	   xmm). This is only does anything for AVX2.  */
@@ -423,4 +425,35 @@ L(between_2_3):
 	movb	%SET_REG8, -1(%LESS_VEC_REG, %rdx)
 #endif
 	ret
-END (MEMSET_SYMBOL (__memset, unaligned_erms))
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+# ifdef USE_WITH_AVX512
+	/* Force align so the loop doesn't cross a cache-line.  */
+	.p2align 4
+# endif
+	.p2align 4,, 7
+    /* Memset using non-temporal stores.  */
+L(nt_memset):
+	VMOVU	%VMM(0), (VEC_SIZE * 0)(%rdi)
+	leaq	(VEC_SIZE * -4)(%rdi, %rdx), %rdx
+    /* Align DST.  */
+	orq	$(VEC_SIZE * 1 - 1), %rdi
+	incq	%rdi
+	.p2align 4,, 7
+L(nt_loop):
+	VMOVNT	%VMM(0), (VEC_SIZE * 0)(%rdi)
+	VMOVNT	%VMM(0), (VEC_SIZE * 1)(%rdi)
+	VMOVNT	%VMM(0), (VEC_SIZE * 2)(%rdi)
+	VMOVNT	%VMM(0), (VEC_SIZE * 3)(%rdi)
+	subq	$(VEC_SIZE * -4), %rdi
+	cmpq	%rdx, %rdi
+	jb	L(nt_loop)
+	sfence
+	VMOVU	%VMM(0), (VEC_SIZE * 0)(%rdx)
+	VMOVU	%VMM(0), (VEC_SIZE * 1)(%rdx)
+	VMOVU	%VMM(0), (VEC_SIZE * 2)(%rdx)
+	VMOVU	%VMM(0), (VEC_SIZE * 3)(%rdx)
+	VZEROUPPER_RETURN
+#endif
+
+END(MEMSET_SYMBOL(__memset, unaligned_erms))
diff --git a/sysdeps/x86_64/multiarch/wcsncat-evex.S b/sysdeps/x86_64/multiarch/wcsncat-evex.S
index 392215950a..10bfb0a531 100644
--- a/sysdeps/x86_64/multiarch/wcsncat-evex.S
+++ b/sysdeps/x86_64/multiarch/wcsncat-evex.S
@@ -1,9 +1,9 @@
-#ifndef WCSCAT
-# define WCSCAT	__wcsncat_evex
+#ifndef WCSNCAT
+# define WCSNCAT	__wcsncat_evex
 #endif
 
 #define USE_AS_WCSCPY
 #define USE_AS_STRCAT
 
-#define STRNCAT	WCSCAT
+#define STRNCAT	WCSNCAT
 #include "strncat-evex.S"