10 files changed, 94 insertions, 50 deletions
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index c2e36dcbdf..e3c811549c 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1062,6 +1062,7 @@ float128: 1
 ldouble: 1
 
 Function: Imaginary part of "csin":
+float: 1
 float128: 1
 
 Function: Real part of "csin_downward":
@@ -1431,25 +1432,21 @@ float: 1
 
 Function: "exp10m1":
 double: 4
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "exp10m1_downward":
 double: 3
-float: 3
 float128: 6
 ldouble: 6
 
 Function: "exp10m1_towardzero":
 double: 2
-float: 3
 float128: 6
 ldouble: 5
 
 Function: "exp10m1_upward":
 double: 5
-float: 4
 float128: 6
 ldouble: 6
 
@@ -1499,25 +1496,21 @@ float: 1
 
 Function: "exp2m1":
 double: 2
-float: 2
 float128: 2
 ldouble: 3
 
 Function: "exp2m1_downward":
 double: 3
-float: 3
 float128: 3
 ldouble: 6
 
 Function: "exp2m1_towardzero":
 double: 3
-float: 2
 float128: 4
 ldouble: 5
 
 Function: "exp2m1_upward":
 double: 3
-float: 3
 float128: 5
 ldouble: 6
 
@@ -1558,25 +1551,21 @@ float: 1
 
 Function: "expm1":
 double: 1
-float: 1
 float128: 2
 ldouble: 3
 
 Function: "expm1_downward":
 double: 1
-float: 1
 float128: 2
 ldouble: 4
 
 Function: "expm1_towardzero":
 double: 1
-float: 2
 float128: 4
 ldouble: 4
 
 Function: "expm1_upward":
 double: 1
-float: 1
 float128: 3
 ldouble: 4
 
@@ -1809,49 +1798,41 @@ float: 1
 
 Function: "log10p1":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "log10p1_downward":
 double: 2
-float: 3
 float128: 4
 ldouble: 8
 
 Function: "log10p1_towardzero":
 double: 3
-float: 2
 float128: 3
 ldouble: 8
 
 Function: "log10p1_upward":
 double: 2
-float: 3
 float128: 4
 ldouble: 6
 
 Function: "log1p":
 double: 1
-float: 1
 float128: 3
 ldouble: 2
 
 Function: "log1p_downward":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "log1p_towardzero":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "log1p_upward":
 double: 2
-float: 2
 float128: 2
 ldouble: 3
 
@@ -1921,25 +1902,21 @@ float: 1
 
 Function: "log2p1":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "log2p1_downward":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "log2p1_towardzero":
 double: 2
-float: 2
 float128: 2
 ldouble: 4
 
 Function: "log2p1_upward":
 double: 2
-float: 2
 float128: 3
 ldouble: 5
 
@@ -1981,25 +1958,21 @@ float: 3
 
 Function: "logp1":
 double: 1
-float: 1
 float128: 3
 ldouble: 2
 
 Function: "logp1_downward":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "logp1_towardzero":
 double: 2
-float: 2
 float128: 3
 ldouble: 4
 
 Function: "logp1_upward":
 double: 2
-float: 2
 float128: 2
 ldouble: 3
 
@@ -2263,25 +2236,21 @@ double: 1
 
 Function: "tgamma":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_towardzero":
 double: 9
-float: 7
 float128: 5
 ldouble: 6
 
 Function: "tgamma_upward":
 double: 9
-float: 8
 float128: 4
 ldouble: 5
 
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index cbe09d49f4..e823d2fcc6 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -11,6 +11,8 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2
 CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 CFLAGS-s_sincos-fma.c = -mfma -mavx2
+CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
+CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
 
 CFLAGS-e_exp2f-fma.c = -mfma -mavx2
 CFLAGS-e_expf-fma.c = -mfma -mavx2
@@ -72,6 +74,8 @@ libm-sysdep_routines += \
   s_ceilf-sse4_1 \
   s_cosf-fma \
   s_cosf-sse2 \
+  s_exp10m1f-fma \
+  s_exp2m1f-fma \
   s_expm1-fma \
   s_floor-sse4_1 \
   s_floorf-sse4_1 \
diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
new file mode 100644
index 0000000000..3dda04e2dd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
@@ -0,0 +1,4 @@
+#define __exp10m1f __exp10m1f_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
new file mode 100644
index 0000000000..8040b7ed79
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
@@ -0,0 +1,33 @@
+/* Multiple versions of exp10m1.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+
+extern float __redirect_exp10m1f (float);
+
+# define SYMBOL_NAME exp10m1f
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ());
+libm_alias_float (__exp10m1, exp10m1)
+
+# define __exp10m1f __exp10m1f_sse2
+#endif
+#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
new file mode 100644
index 0000000000..bfa00eae4e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
@@ -0,0 +1,4 @@
+#define __exp2m1f __exp2m1f_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/flt-32/s_exp2m1f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
new file mode 100644
index 0000000000..8e2d7ec384
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
@@ -0,0 +1,33 @@
+/* Multiple versions of exp2m1.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+
+extern float __redirect_exp2m1f (float);
+
+# define SYMBOL_NAME exp2m1f
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_exp2m1f, __exp2m1f, IFUNC_SELECTOR ());
+libm_alias_float (__exp2m1, exp2m1)
+
+# define __exp2m1f __exp2m1f_sse2
+#endif
+#include <sysdeps/ieee754/flt-32/s_exp2m1f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 10e6e625b3..92409a2779 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 85725e29c0..b5b63fd125 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -9,7 +9,7 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S
index 06730ab2a1..cea034f394 100644
--- a/sysdeps/x86_64/multiarch/strcmp-evex.S
+++ b/sysdeps/x86_64/multiarch/strcmp-evex.S
@@ -209,7 +209,9 @@
    returned.  */
 
 	.section SECTION(.text), "ax", @progbits
-	.align	16
+	/* Align 64 bytes here. This is to get the L(loop) block ideally
+	   aligned for the DSB.  */
+	.align	64
 	.type	STRCMP, @function
 	.globl	STRCMP
 # ifdef USE_AS_STRCASECMP_L
@@ -509,9 +511,7 @@ L(ret4):
 	ret
 # endif
 
-	/* 32 byte align here ensures the main loop is ideally aligned
-	   for DSB.  */
-	.p2align 5
+	.p2align 4,, 4
 L(more_3x_vec):
 	/* Safe to compare 4x vectors.  */
 	VMOVU	(VEC_SIZE)(%rdi), %VMM(0)
@@ -1426,10 +1426,9 @@ L(less_32_till_page):
 L(ret_zero_page_cross_slow_case0):
 	xorl	%eax, %eax
 	ret
-# endif
-
-
+# else
 	.p2align 4,, 10
+# endif
 L(less_16_till_page):
 	cmpl	$((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax
 	ja	L(less_8_till_page)
@@ -1482,8 +1481,12 @@ L(less_16_till_page):
 # endif
 	jmp	L(prepare_loop_aligned)
 
-
-
+# ifndef USE_AS_STRNCMP
+	/* Fits in aligning bytes.  */
+L(ret_zero_4_loop):
+	xorl	%eax, %eax
+	ret
+# endif
 
 	.p2align 4,, 10
 L(less_8_till_page):
@@ -1554,6 +1557,7 @@ L(ret_less_8_wcs):
 
 #  ifdef USE_AS_STRNCMP
 	.p2align 4,, 2
+L(ret_zero_4_loop):
 L(ret_zero_page_cross_slow_case1):
 	xorl	%eax, %eax
 	ret
@@ -1586,10 +1590,6 @@ L(less_4_loop):
 	subq	$-(CHAR_PER_VEC * 4), %rdx
 #  endif
 	jmp	L(prepare_loop_aligned)
-
-L(ret_zero_4_loop):
-	xorl	%eax, %eax
-	ret
 L(ret_less_4_loop):
 	xorl	%r8d, %eax
 	subl	%r8d, %eax
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
index 2bbd563a6c..988a4b8593 100644
--- a/sysdeps/x86_64/nptl/tcb-offsets.sym
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -13,6 +13,3 @@ MULTIPLE_THREADS_OFFSET	offsetof (tcbhead_t, multiple_threads)
 POINTER_GUARD		offsetof (tcbhead_t, pointer_guard)
 FEATURE_1_OFFSET	offsetof (tcbhead_t, feature_1)
 SSP_BASE_OFFSET		offsetof (tcbhead_t, ssp_base)
-
--- Not strictly offsets, but these values are also used in the TCB.
-TCB_CANCELED_BITMASK	 CANCELED_BITMASK