about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/x86_64/memset.S45
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c249
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memset.h45
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-wmemset.h21
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S5
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S4
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S12
-rw-r--r--sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S12
-rw-r--r--sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S57
-rw-r--r--sysdeps/x86_64/multiarch/rtld-memset.S18
10 files changed, 265 insertions, 203 deletions
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index a6eea61a4d..f4e1bab601 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -18,47 +18,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#define USE_WITH_SSE2	1
 
-#define VEC_SIZE	16
-#define MOV_SIZE	3
-#define RET_SIZE	1
+#define MEMSET_SYMBOL(p,s)	memset
+#define MEMSET_CHK_SYMBOL(p,s)	p
 
-#define VEC(i)		xmm##i
-#define VMOVU     movups
-#define VMOVA     movaps
+#define WMEMSET_SYMBOL(p,s)	__wmemset
+#define WMEMSET_CHK_SYMBOL(p,s) p
 
-# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
-  movd d, %xmm0; \
-  movq r, %rax; \
-  punpcklbw %xmm0, %xmm0; \
-  punpcklwd %xmm0, %xmm0; \
-  pshufd $0, %xmm0, %xmm0
+#define DEFAULT_IMPL_V1	"multiarch/memset-sse2-unaligned-erms.S"
+#define DEFAULT_IMPL_V3	"multiarch/memset-avx2-unaligned-erms.S"
+#define DEFAULT_IMPL_V4	"multiarch/memset-evex-unaligned-erms.S"
 
-# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
-  movd d, %xmm0; \
-  pshufd $0, %xmm0, %xmm0; \
-  movq r, %rax
-
-# define MEMSET_VDUP_TO_VEC0_HIGH()
-# define MEMSET_VDUP_TO_VEC0_LOW()
-
-# define WMEMSET_VDUP_TO_VEC0_HIGH()
-# define WMEMSET_VDUP_TO_VEC0_LOW()
-
-#define SECTION(p)		p
-
-#ifndef MEMSET_SYMBOL
-# define MEMSET_CHK_SYMBOL(p,s)	p
-# define MEMSET_SYMBOL(p,s)	memset
-#endif
-
-#ifndef WMEMSET_SYMBOL
-# define WMEMSET_CHK_SYMBOL(p,s) p
-# define WMEMSET_SYMBOL(p,s)	__wmemset
-#endif
-
-#include "multiarch/memset-vec-unaligned-erms.S"
+#include "isa-default-impl.h"
 
 libc_hidden_builtin_def (memset)
 
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 7858aa316f..21008c72b4 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -213,94 +213,99 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, __memset_chk,
 	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
 			      __memset_chk_erms)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
-			      __memset_chk_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
-			      __memset_chk_sse2_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      CPU_FEATURE_USABLE (AVX2),
-			      __memset_chk_avx2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      CPU_FEATURE_USABLE (AVX2),
-			      __memset_chk_avx2_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      (CPU_FEATURE_USABLE (AVX2)
-			       && CPU_FEATURE_USABLE (RTM)),
-			      __memset_chk_avx2_unaligned_rtm)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      (CPU_FEATURE_USABLE (AVX2)
-			       && CPU_FEATURE_USABLE (RTM)),
-			      __memset_chk_avx2_unaligned_erms_rtm)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_chk_evex_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_chk_evex_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_chk_avx512_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_chk_avx512_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-			      __memset_chk_avx512_no_vzeroupper)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_chk_avx512_unaligned_erms)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_chk_avx512_unaligned)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+				     CPU_FEATURE_USABLE (AVX512F),
+				     __memset_chk_avx512_no_vzeroupper)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_chk_evex_unaligned)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_chk_evex_unaligned_erms)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+				     CPU_FEATURE_USABLE (AVX2),
+				     __memset_chk_avx2_unaligned)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+				     CPU_FEATURE_USABLE (AVX2),
+				     __memset_chk_avx2_unaligned_erms)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (RTM)),
+				     __memset_chk_avx2_unaligned_rtm)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (RTM)),
+				     __memset_chk_avx2_unaligned_erms_rtm)
+	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
+	         implementation is also used at ISA level 2.  */
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, __memset_chk, 1,
+				     __memset_chk_sse2_unaligned)
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, __memset_chk, 1,
+				     __memset_chk_sse2_unaligned_erms)
 	      )
 #endif
 
   /* Support sysdeps/x86_64/multiarch/memset.c.  */
   IFUNC_IMPL (i, name, memset,
 	      IFUNC_IMPL_ADD (array, i, memset, 1,
-			      __memset_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memset, 1,
-			      __memset_sse2_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      CPU_FEATURE_USABLE (AVX2),
-			      __memset_avx2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      CPU_FEATURE_USABLE (AVX2),
-			      __memset_avx2_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      (CPU_FEATURE_USABLE (AVX2)
-			       && CPU_FEATURE_USABLE (RTM)),
-			      __memset_avx2_unaligned_rtm)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      (CPU_FEATURE_USABLE (AVX2)
-			       && CPU_FEATURE_USABLE (RTM)),
-			      __memset_avx2_unaligned_erms_rtm)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_evex_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_evex_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_avx512_unaligned_erms)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __memset_avx512_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memset,
-			      CPU_FEATURE_USABLE (AVX512F),
-			      __memset_avx512_no_vzeroupper)
+			      __memset_erms)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_avx512_unaligned_erms)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_avx512_unaligned)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+				     CPU_FEATURE_USABLE (AVX512F),
+				     __memset_avx512_no_vzeroupper)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_evex_unaligned)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __memset_evex_unaligned_erms)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+				     CPU_FEATURE_USABLE (AVX2),
+				     __memset_avx2_unaligned)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+				     CPU_FEATURE_USABLE (AVX2),
+				     __memset_avx2_unaligned_erms)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (RTM)),
+				     __memset_avx2_unaligned_rtm)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (RTM)),
+				     __memset_avx2_unaligned_erms_rtm)
+	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
+	         implementation is also used at ISA level 2.  */
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, memset, 1,
+				     __memset_sse2_unaligned)
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, memset, 1,
+				     __memset_sse2_unaligned_erms)
 	     )
 
   /* Support sysdeps/x86_64/multiarch/rawmemchr.c.  */
@@ -821,25 +826,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/wmemset.c.  */
   IFUNC_IMPL (i, name, wmemset,
-	      IFUNC_IMPL_ADD (array, i, wmemset, 1,
-			      __wmemset_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, wmemset,
-			      CPU_FEATURE_USABLE (AVX2),
-			      __wmemset_avx2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, wmemset,
-			      (CPU_FEATURE_USABLE (AVX2)
-			       && CPU_FEATURE_USABLE (RTM)),
-			      __wmemset_avx2_unaligned_rtm)
-	      IFUNC_IMPL_ADD (array, i, wmemset,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __wmemset_evex_unaligned)
-	      IFUNC_IMPL_ADD (array, i, wmemset,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __wmemset_avx512_unaligned))
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, wmemset,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __wmemset_evex_unaligned)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, wmemset,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __wmemset_avx512_unaligned)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, wmemset,
+				     CPU_FEATURE_USABLE (AVX2),
+				     __wmemset_avx2_unaligned)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, wmemset,
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (RTM)),
+				     __wmemset_avx2_unaligned_rtm)
+	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
+	         implementation is also used at ISA level 2.  */
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, wmemset, 1,
+				     __wmemset_sse2_unaligned))
 
 #ifdef SHARED
   /* Support sysdeps/x86_64/multiarch/memcpy_chk.c.  */
@@ -1049,25 +1056,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 #ifdef SHARED
   /* Support sysdeps/x86_64/multiarch/wmemset_chk.c.  */
   IFUNC_IMPL (i, name, __wmemset_chk,
-	      IFUNC_IMPL_ADD (array, i, __wmemset_chk, 1,
-			      __wmemset_chk_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
-			      CPU_FEATURE_USABLE (AVX2),
-			      __wmemset_chk_avx2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
-			      (CPU_FEATURE_USABLE (AVX2)
-			       && CPU_FEATURE_USABLE (RTM)),
-			      __wmemset_chk_avx2_unaligned_rtm)
-	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __wmemset_chk_evex_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
-			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)
-			       && CPU_FEATURE_USABLE (BMI2)),
-			      __wmemset_chk_avx512_unaligned))
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __wmemset_chk,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __wmemset_chk_evex_unaligned)
+	      X86_IFUNC_IMPL_ADD_V4 (array, i, __wmemset_chk,
+				     (CPU_FEATURE_USABLE (AVX512VL)
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
+				     __wmemset_chk_avx512_unaligned)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, __wmemset_chk,
+				     CPU_FEATURE_USABLE (AVX2),
+				     __wmemset_chk_avx2_unaligned)
+	      X86_IFUNC_IMPL_ADD_V3 (array, i, __wmemset_chk,
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (RTM)),
+				     __wmemset_chk_avx2_unaligned_rtm)
+	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
+	         implementation is also used at ISA level 2.  */
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, __wmemset_chk, 1,
+				     __wmemset_chk_sse2_unaligned))
 #endif
 
   return 0;
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
index 64d179913c..ed514976aa 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -20,10 +20,19 @@
 #include <init-arch.h>
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
   attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+  attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
   attribute_hidden;
+
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
   attribute_hidden;
@@ -31,31 +40,26 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
   attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
-  attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
-  attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
-  attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
   attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
   attribute_hidden;
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
     return OPTIMIZE (erms);
 
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-          && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+      if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	{
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
 	    return OPTIMIZE (avx512_unaligned_erms);
@@ -66,11 +70,11 @@ IFUNC_SELECTOR (void)
       return OPTIMIZE (avx512_no_vzeroupper);
     }
 
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-          && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+      if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	{
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
 	    return OPTIMIZE (evex_unaligned_erms);
@@ -86,7 +90,8 @@ IFUNC_SELECTOR (void)
 	  return OPTIMIZE (avx2_unaligned_rtm);
 	}
 
-      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+      if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+				       Prefer_No_VZEROUPPER, !))
 	{
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
 	    return OPTIMIZE (avx2_unaligned_erms);
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
index 87c48e2387..3810c719c6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -18,22 +18,26 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
+
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
   attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  const struct cpu_features *cpu_features = __get_cpu_features ();
 
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+				      AVX_Fast_Unaligned_Load, !))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+      if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
 	{
 	  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
 	    return OPTIMIZE (avx512_unaligned);
@@ -44,7 +48,8 @@ IFUNC_SELECTOR (void)
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
 	return OPTIMIZE (avx2_unaligned_rtm);
 
-      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+      if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2_unaligned);
     }
 
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
index c0bf2875d0..a9054a9122 100644
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
+
 # define USE_WITH_AVX2	1
 
 # define VEC_SIZE	32
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
index c5be8f57ef..8cc9c16d73 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -17,8 +17,10 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
 
-#if IS_IN (libc)
 
 #include "asm-syntax.h"
 #ifndef MEMSET
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
index 5241216a77..47623b8ee8 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
 # define USE_WITH_AVX512	1
 
 # define VEC_SIZE	64
@@ -30,8 +33,15 @@
 # define WMEMSET_VDUP_TO_VEC0_LOW()
 
 # define SECTION(p)		p##.evex512
+
+#ifndef MEMSET_SYMBOL
 # define MEMSET_SYMBOL(p,s)	p##_avx512_##s
+#endif
+#ifndef WMEMSET_SYMBOL
 # define WMEMSET_SYMBOL(p,s)	p##_avx512_##s
+#endif
+
+
 # define USE_LESS_VEC_MASK_STORE	1
 # include "memset-vec-unaligned-erms.S"
 #endif
diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
index 6370021506..ac4b2d2d50 100644
--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
 # define USE_WITH_EVEX	1
 
 # define VEC_SIZE	32
@@ -30,8 +33,15 @@
 # define WMEMSET_VDUP_TO_VEC0_LOW()
 
 # define SECTION(p)		p##.evex
+
+#ifndef MEMSET_SYMBOL
 # define MEMSET_SYMBOL(p,s)	p##_evex_##s
+#endif
+#ifndef WMEMSET_SYMBOL
 # define WMEMSET_SYMBOL(p,s)	p##_evex_##s
+#endif
+
+
 # define USE_LESS_VEC_MASK_STORE	1
 # include "memset-vec-unaligned-erms.S"
 #endif
diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index 3d92f6993a..44f9b8888b 100644
--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -17,22 +17,51 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <shlib-compat.h>
+#include <isa-level.h>
 
-#if IS_IN (libc)
-# define MEMSET_SYMBOL(p,s)	p##_sse2_##s
-# define WMEMSET_SYMBOL(p,s)	p##_sse2_##s
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+   so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
 
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-#  define libc_hidden_builtin_def(name)
+# include <sysdep.h>
+# define USE_WITH_SSE2	1
+
+# define VEC_SIZE	16
+# define MOV_SIZE	3
+# define RET_SIZE	1
+
+# define VEC(i)		xmm##i
+# define VMOVU     movups
+# define VMOVA     movaps
+
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+  movd d, %xmm0; \
+  movq r, %rax; \
+  punpcklbw %xmm0, %xmm0; \
+  punpcklwd %xmm0, %xmm0; \
+  pshufd $0, %xmm0, %xmm0
+
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+  movd d, %xmm0; \
+  pshufd $0, %xmm0, %xmm0; \
+  movq r, %rax
+
+# define MEMSET_VDUP_TO_VEC0_HIGH()
+# define MEMSET_VDUP_TO_VEC0_LOW()
+
+# define WMEMSET_VDUP_TO_VEC0_HIGH()
+# define WMEMSET_VDUP_TO_VEC0_LOW()
+
+# define SECTION(p)		p
+
+# ifndef MEMSET_SYMBOL
+#  define MEMSET_SYMBOL(p,s)	p##_sse2_##s
 # endif
 
-# undef weak_alias
-# define weak_alias(original, alias)
-# undef strong_alias
-# define strong_alias(ignored1, ignored2)
-#endif
+# ifndef WMEMSET_SYMBOL
+#  define WMEMSET_SYMBOL(p,s)	p##_sse2_##s
+# endif
+
+# include "memset-vec-unaligned-erms.S"
 
-#include <sysdeps/x86_64/memset.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/rtld-memset.S b/sysdeps/x86_64/multiarch/rtld-memset.S
new file mode 100644
index 0000000000..d912bfa7cc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-memset.S
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "../memset.S"