summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/ifunc-wmemset.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 07:15:03 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee (patch)
tree03329adc8a73b1fbfafad45ff41e8706c799d5c0 /sysdeps/x86_64/multiarch/ifunc-wmemset.h
parent63ad43566f7a25d140dc723598aeb441ad657eed (diff)
downloadglibc-1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee.tar.gz
glibc-1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee.tar.xz
glibc-1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee.zip
x86-64: Add memset family functions with 256-bit EVEX
Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized
with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM
abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at
function exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-wmemset.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-wmemset.h13
1 files changed, 9 insertions, 4 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
index c1b0c2254b..7e947c56b4 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -20,6 +20,7 @@
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
 
 static inline void *
@@ -27,14 +28,18 @@ IFUNC_SELECTOR (void)
 {
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
-	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
+	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
 	return OPTIMIZE (avx512_unaligned);
-      else
+
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+	return OPTIMIZE (evex_unaligned);
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
 	return OPTIMIZE (avx2_unaligned);
     }