about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/ifunc-strcpy.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 06:36:50 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit525bc2a32c9710df40371f951217c6ae7a923aee (patch)
treeefd1cd7acf189386bf9223678ccea60de70bc93b /sysdeps/x86_64/multiarch/ifunc-strcpy.h
parent1fd8c163a83d96ace1ff78fa6bac7aee084f6f77 (diff)
downloadglibc-525bc2a32c9710df40371f951217c6ae7a923aee.tar.gz
glibc-525bc2a32c9710df40371f951217c6ae7a923aee.tar.xz
glibc-525bc2a32c9710df40371f951217c6ae7a923aee.zip
x86-64: Add strcpy family functions with 256-bit EVEX
Update ifunc-strcpy.h to select the function optimized with 256-bit EVEX
instructions using YMM16-YMM31 registers to avoid RTM abort with usable
AVX512VL and AVX512BW since VZEROUPPER isn't needed at function exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-strcpy.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-strcpy.h13
1 files changed, 10 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index 1100cd23c6..f31f436adf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -25,16 +25,23 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-    return OPTIMIZE (avx2);
+    {
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+	return OPTIMIZE (evex);
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	return OPTIMIZE (avx2);
+    }
 
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return OPTIMIZE (sse2_unaligned);