about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/wcsnlen.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 06:24:52 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit1fd8c163a83d96ace1ff78fa6bac7aee084f6f77 (patch)
treef10a0556c0e7a8d1a97e4702568c4e88869ac168 /sysdeps/x86_64/multiarch/wcsnlen.c
parent1da50d4bda07f04135dca39f40e79fc9eabed1f8 (diff)
downloadglibc-1fd8c163a83d96ace1ff78fa6bac7aee084f6f77.tar.gz
glibc-1fd8c163a83d96ace1ff78fa6bac7aee084f6f77.tar.xz
glibc-1fd8c163a83d96ace1ff78fa6bac7aee084f6f77.zip
x86-64: Add ifunc-avx2.h functions with 256-bit EVEX
Update ifunc-avx2.h, strchr.c, strcmp.c, strncmp.c and wcsnlen.c to
select the function optimized with 256-bit EVEX instructions using
YMM16-YMM31 registers to avoid RTM abort with usable AVX512VL, AVX512BW
and BMI2 since VZEROUPPER isn't needed at function exit.

For strcmp/strncmp, prefer AVX2 strcmp/strncmp if Prefer_AVX2_STRCMP
is set.
Diffstat (limited to 'sysdeps/x86_64/multiarch/wcsnlen.c')
-rw-r--r--sysdeps/x86_64/multiarch/wcsnlen.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index 81b1a221ff..19bc6fd938 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -29,16 +29,24 @@
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-    return OPTIMIZE (avx2);
+    {
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	return OPTIMIZE (evex);
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	return OPTIMIZE (avx2);
+    }
 
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
     return OPTIMIZE (sse4_1);