about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/mempcpy.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-01-30 06:50:20 -0800
committerH.J. Lu <hjl.tools@gmail.com>2015-01-30 12:26:33 -0800
commit328fc20e5e334a642f0152d9662474789381a897 (patch)
tree6efc6b6b150d1373c36c8bdaf4a606989c152a9a /sysdeps/x86_64/multiarch/mempcpy.S
parentf80af76648ed97a76745fad6caa3315a79cb1c7c (diff)
downloadglibc-hjl/release/2.20/master.tar.gz
glibc-hjl/release/2.20/master.tar.xz
glibc-hjl/release/2.20/master.zip
Use AVX unaligned memcpy only if AVX2 is available hjl/release/2.20/master
memcpy with unaligned 256-bit AVX register loads/stores are slow on older
processorsl like Sandy Bridge.  This patch adds bit_AVX_Fast_Unaligned_Load
and sets it only when AVX2 is available.

	[BZ #17801]
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
	Set the bit_AVX_Fast_Unaligned_Load bit for AVX2.
	* sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Fast_Unaligned_Load):
	New.
	(index_AVX_Fast_Unaligned_Load): Likewise.
	(HAS_AVX_FAST_UNALIGNED_LOAD): Likewise.
	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Check the
	bit_AVX_Fast_Unaligned_Load bit instead of the bit_AVX_Usable bit.
	* sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk): Likewise.
	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Likewise.
	* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Likewise.
	* sysdeps/x86_64/multiarch/memmove.c (__libc_memmove): Replace
	HAS_AVX with HAS_AVX_FAST_UNALIGNED_LOAD.
	* sysdeps/x86_64/multiarch/memmove_chk.c (__memmove_chk): Likewise.

[cherry picked from commit 56d25c11b64a97255a115901d136d753c86de24e]
Diffstat (limited to 'sysdeps/x86_64/multiarch/mempcpy.S')
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S2
1 files changed, 1 insertions, 1 deletions
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index 7589d8c1ec..e205ef5557 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -37,7 +37,7 @@ ENTRY(__mempcpy)
 	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__mempcpy_ssse3_back(%rip), %rax
-	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+	testl	$bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
 	jz	2f
 	leaq	__mempcpy_avx_unaligned(%rip), %rax
 2:	ret