diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2021-03-05 07:15:03 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2022-01-27 12:47:19 -0800 |
commit | c7289e2bfdbff9730e24e81cc2924274e4f197bc (patch) | |
tree | 6a82597ae76de17f72253271ea429f0ee5df7594 /sysdeps/x86_64/multiarch/ifunc-memset.h | |
parent | 5141ddbe3aace5c713d6e2c4260cd1646e54489b (diff) | |
download | glibc-c7289e2bfdbff9730e24e81cc2924274e4f197bc.tar.gz glibc-c7289e2bfdbff9730e24e81cc2924274e4f197bc.tar.xz glibc-c7289e2bfdbff9730e24e81cc2924274e4f197bc.zip |
x86-64: Add memset family functions with 256-bit EVEX
Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at function exit. (cherry picked from commit 1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee)
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-memset.h')
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-memset.h | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h index 19b5ae676c..fea6c832f4 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memset.h +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h @@ -27,6 +27,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) @@ -56,10 +60,22 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) { - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx2_unaligned_erms); - else - return OPTIMIZE (avx2_unaligned); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (evex_unaligned_erms); + + return OPTIMIZE (evex_unaligned); + } + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx2_unaligned_erms); + + return OPTIMIZE (avx2_unaligned); + } } if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) |