about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/ifunc-memset.h
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2021-04-19 17:48:10 -0400
committerH.J. Lu <hjl.tools@gmail.com>2021-04-19 15:08:04 -0700
commitf53790272ce7bdc5ecd14b45f65d0464d2a61a3a (patch)
tree080054bf654d57c524271f86b8195d602d881ae4 /sysdeps/x86_64/multiarch/ifunc-memset.h
parent83c5b368226c34a2f0a5287df40fc290b2b34359 (diff)
downloadglibc-f53790272ce7bdc5ecd14b45f65d0464d2a61a3a.tar.gz
glibc-f53790272ce7bdc5ecd14b45f65d0464d2a61a3a.tar.xz
glibc-f53790272ce7bdc5ecd14b45f65d0464d2a61a3a.zip
x86: Optimize less_vec evex and avx512 memset-vec-unaligned-erms.S
No bug. This commit adds optimized cased for less_vec memset case that
uses the avx512vl/avx512bw mask store avoiding the excessive
branches. test-memset and test-wmemset are passing.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-memset.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memset.h6
1 files changed, 4 insertions, 2 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
index 502f946a84..eda5640541 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -54,7 +54,8 @@ IFUNC_SELECTOR (void)
       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+          && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	{
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
 	    return OPTIMIZE (avx512_unaligned_erms);
@@ -68,7 +69,8 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+          && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
 	{
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
 	    return OPTIMIZE (evex_unaligned_erms);