about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/ifunc-memmove.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 06:46:08 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit63ad43566f7a25d140dc723598aeb441ad657eed (patch)
treedc9bcd568f14661c8adc15e5f69a150d71e426ec /sysdeps/x86_64/multiarch/ifunc-memmove.h
parent525bc2a32c9710df40371f951217c6ae7a923aee (diff)
downloadglibc-63ad43566f7a25d140dc723598aeb441ad657eed.tar.gz
glibc-63ad43566f7a25d140dc723598aeb441ad657eed.tar.xz
glibc-63ad43566f7a25d140dc723598aeb441ad657eed.zip
x86-64: Add memmove family functions with 256-bit EVEX
Update ifunc-memmove.h to select the function optimized with 256-bit EVEX
instructions using YMM16-YMM31 registers to avoid RTM abort with usable
AVX512VL since VZEROUPPER isn't needed at function exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-memmove.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memmove.h21
1 files changed, 18 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
index bf42a555dc..517b332bfc 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
@@ -29,6 +29,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
   attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+  attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
@@ -59,10 +63,21 @@ IFUNC_SELECTOR (void)
 
   if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-	return OPTIMIZE (avx_unaligned_erms);
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+	{
+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+	    return OPTIMIZE (evex_unaligned_erms);
+
+	  return OPTIMIZE (evex_unaligned);
+	}
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	{
+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+	    return OPTIMIZE (avx_unaligned_erms);
 
-      return OPTIMIZE (avx_unaligned);
+	  return OPTIMIZE (avx_unaligned);
+	}
     }
 
   if (!CPU_FEATURE_USABLE_P (cpu_features, SSSE3)