x86: Add support to build strcmp/strlen/strchr with explicit ISA level

1. Add default ISA level selection in non-multiarch/rtld implementations. 2. Add ISA level build guards to different implementations. - I.e strcmp-avx2.S which is ISA level 3 will only build if compiled ISA level <= 3. Otherwise there is no reason to include it as we will always use one of the ISA level 4 implementations (strcmp-evex.S). 3. Refactor the ifunc selector and ifunc implementation list to use the ISA level aware wrapper macros that allow functions below the compiled ISA level (with a guranteed replacement) to be skipped. Tested with and without multiarch on x86_64 for ISA levels: {generic, x86-64-v2, x86-64-v3, x86-64-v4} And m32 with and without multiarch.
author: Noah Goldstein <goldstein.w.n@gmail.com> 2022-07-13 16:32:59 -0700
committer: Noah Goldstein <goldstein.w.n@gmail.com> 2022-07-16 03:07:59 -0700
commit: ceabdcd130ca7043b0fcf2676183d79431d10493 (patch)
tree: f6ced435023b66ac1f3b519e5b26a60ef8f479dd /sysdeps/x86_64/multiarch/strcmp.c
parent: c353689e49e72f3aafa1a9e68d4f7a4f33a79cbe (diff)
download: glibc-ceabdcd130ca7043b0fcf2676183d79431d10493.tar.gz
glibc-ceabdcd130ca7043b0fcf2676183d79431d10493.tar.xz
glibc-ceabdcd130ca7043b0fcf2676183d79431d10493.zip
1 files changed, 25 insertions, 12 deletions
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index 9c1677724c..fdd5afe3af 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -26,37 +26,50 @@
 # define SYMBOL_NAME strcmp
 # include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME)
+    OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+
+
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
-  const struct cpu_features* cpu_features = __get_cpu_features ();
+  const struct cpu_features *cpu_features = __get_cpu_features ();
 
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+				      AVX_Fast_Unaligned_Load, ))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+      if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
 	return OPTIMIZE (avx2_rtm);
 
-      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+      if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2);
     }
 
-  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+      /* Keep this as runtime check.  Some ISA level >= 2 CPUs such as
+	 Tremont, Silvermont, and more check this.  */
       && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
     return OPTIMIZE (sse42);
 
+  /* Keep this as runtime check.  The standard SSE2 version has
+     meaningful optimizations around keeping all loads aligned in the
+     main loop which can benefit some ISA level >= 2 CPUs.  */
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return OPTIMIZE (sse2_unaligned);
author	Noah Goldstein <goldstein.w.n@gmail.com>	2022-07-13 16:32:59 -0700
committer	Noah Goldstein <goldstein.w.n@gmail.com>	2022-07-16 03:07:59 -0700
commit	ceabdcd130ca7043b0fcf2676183d79431d10493 (patch)
tree	f6ced435023b66ac1f3b519e5b26a60ef8f479dd /sysdeps/x86_64/multiarch/strcmp.c
parent	c353689e49e72f3aafa1a9e68d4f7a4f33a79cbe (diff)
download	glibc-ceabdcd130ca7043b0fcf2676183d79431d10493.tar.gz glibc-ceabdcd130ca7043b0fcf2676183d79431d10493.tar.xz glibc-ceabdcd130ca7043b0fcf2676183d79431d10493.zip