diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2021-03-05 06:36:50 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2021-03-29 07:40:17 -0700 |
commit | 525bc2a32c9710df40371f951217c6ae7a923aee (patch) | |
tree | efd1cd7acf189386bf9223678ccea60de70bc93b /sysdeps/x86_64/multiarch/ifunc-strcpy.h | |
parent | 1fd8c163a83d96ace1ff78fa6bac7aee084f6f77 (diff) | |
download | glibc-525bc2a32c9710df40371f951217c6ae7a923aee.tar.gz glibc-525bc2a32c9710df40371f951217c6ae7a923aee.tar.xz glibc-525bc2a32c9710df40371f951217c6ae7a923aee.zip |
x86-64: Add strcpy family functions with 256-bit EVEX
Update ifunc-strcpy.h to select the function optimized with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at function exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-strcpy.h')
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-strcpy.h | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index 1100cd23c6..f31f436adf 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -25,16 +25,23 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURE_USABLE_P (cpu_features, AVX2) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) + return OPTIMIZE (evex); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); |