about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/ifunc-memcmp.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 07:20:28 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit91264fe3577fe887b4860923fa6142b5274c8965 (patch)
tree172c0193a4e0a6b0e695fd8e0df51f0d746dd817 /sysdeps/x86_64/multiarch/ifunc-memcmp.h
parent1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee (diff)
downloadglibc-91264fe3577fe887b4860923fa6142b5274c8965.tar.gz
glibc-91264fe3577fe887b4860923fa6142b5274c8965.tar.xz
glibc-91264fe3577fe887b4860923fa6142b5274c8965.zip
x86-64: Add memcmp family functions with 256-bit EVEX
Update ifunc-memcmp.h to select the function optimized with 256-bit EVEX
instructions using YMM16-YMM31 registers to avoid RTM abort with usable
AVX512VL, AVX512BW and MOVBE since VZEROUPPER isn't needed at function
exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-memcmp.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memcmp.h13
1 files changed, 10 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
index d5df541ec4..5ac41a19b8 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
@@ -23,17 +23,24 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-    return OPTIMIZE (avx2_movbe);
+    {
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+	return OPTIMIZE (evex_movbe);
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	return OPTIMIZE (avx2_movbe);
+    }
 
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
     return OPTIMIZE (sse4_1);