about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/init-arch.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-01-30 06:50:20 -0800
committerH.J. Lu <hjl.tools@gmail.com>2015-01-30 12:26:33 -0800
commit328fc20e5e334a642f0152d9662474789381a897 (patch)
tree6efc6b6b150d1373c36c8bdaf4a606989c152a9a /sysdeps/x86_64/multiarch/init-arch.c
parentf80af76648ed97a76745fad6caa3315a79cb1c7c (diff)
downloadglibc-hjl/release/2.20/master.tar.gz
glibc-hjl/release/2.20/master.tar.xz
glibc-hjl/release/2.20/master.zip
Use AVX unaligned memcpy only if AVX2 is available hjl/release/2.20/master
memcpy with unaligned 256-bit AVX register loads/stores are slow on older
processorsl like Sandy Bridge.  This patch adds bit_AVX_Fast_Unaligned_Load
and sets it only when AVX2 is available.

	[BZ #17801]
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
	Set the bit_AVX_Fast_Unaligned_Load bit for AVX2.
	* sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Fast_Unaligned_Load):
	New.
	(index_AVX_Fast_Unaligned_Load): Likewise.
	(HAS_AVX_FAST_UNALIGNED_LOAD): Likewise.
	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Check the
	bit_AVX_Fast_Unaligned_Load bit instead of the bit_AVX_Usable bit.
	* sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk): Likewise.
	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Likewise.
	* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Likewise.
	* sysdeps/x86_64/multiarch/memmove.c (__libc_memmove): Replace
	HAS_AVX with HAS_AVX_FAST_UNALIGNED_LOAD.
	* sysdeps/x86_64/multiarch/memmove_chk.c (__memmove_chk): Likewise.

[cherry picked from commit 56d25c11b64a97255a115901d136d753c86de24e]
Diffstat (limited to 'sysdeps/x86_64/multiarch/init-arch.c')
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c9
1 files changed, 7 insertions, 2 deletions
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 2a6dcb78d8..f7c1bbe0db 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -167,9 +167,14 @@ __init_cpu_features (void)
 	  /* Determine if AVX is usable.  */
 	  if (CPUID_AVX)
 	    __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
-	  /* Determine if AVX2 is usable.  */
+#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
+# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
+#endif
+	  /* Determine if AVX2 is usable.  Unaligned load with 256-bit
+	     AVX registers are faster on processors with AVX2.  */
 	  if (CPUID_AVX2)
-	    __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
+	    __cpu_features.feature[index_AVX2_Usable]
+	      |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
 	  /* Determine if FMA is usable.  */
 	  if (CPUID_FMA)
 	    __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;