about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/ifunc-memset.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 07:15:03 -0800
committerH.J. Lu <hjl.tools@gmail.com>2022-01-27 12:47:19 -0800
commitc7289e2bfdbff9730e24e81cc2924274e4f197bc (patch)
tree6a82597ae76de17f72253271ea429f0ee5df7594 /sysdeps/x86_64/multiarch/ifunc-memset.h
parent5141ddbe3aace5c713d6e2c4260cd1646e54489b (diff)
downloadglibc-c7289e2bfdbff9730e24e81cc2924274e4f197bc.tar.gz
glibc-c7289e2bfdbff9730e24e81cc2924274e4f197bc.tar.xz
glibc-c7289e2bfdbff9730e24e81cc2924274e4f197bc.zip
x86-64: Add memset family functions with 256-bit EVEX
Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized
with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM
abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at
function exit.

(cherry picked from commit 1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee)
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-memset.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memset.h24
1 files changed, 20 insertions, 4 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
index 19b5ae676c..fea6c832f4 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -27,6 +27,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
   attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+  attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
@@ -56,10 +60,22 @@ IFUNC_SELECTOR (void)
 
   if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
     {
-      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
-	return OPTIMIZE (avx2_unaligned_erms);
-      else
-	return OPTIMIZE (avx2_unaligned);
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+	  && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+	{
+	  if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	    return OPTIMIZE (evex_unaligned_erms);
+
+	  return OPTIMIZE (evex_unaligned);
+	}
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	{
+	  if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	    return OPTIMIZE (avx2_unaligned_erms);
+
+	  return OPTIMIZE (avx2_unaligned);
+	}
     }
 
   if (CPU_FEATURES_CPU_P (cpu_features, ERMS))