about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-03-04 08:37:40 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-03-04 08:39:07 -0800
commit14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8 (patch)
tree86611a9511bcc3cafb5de83890af6c0508e569a9
parent4b230f6a60f3bb9cae92306d016535f40578ff2e (diff)
downloadglibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.tar.gz
glibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.tar.xz
glibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.zip
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for
Fast_Copy_Backward to enable __memcpy_ssse3_back.  Existing selection
order is updated with following selection order:

1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_sse2 if SSSE3 isn't available.
4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_ssse3

	[BZ #18880]
	* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
	instead of Slow_BSF, and also check for Fast_Copy_Backward to
	enable __memcpy_ssse3_back.
-rw-r--r--ChangeLog8
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S27
2 files changed, 22 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 7c5ee2dd19..7b36bd732d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2016-03-04  Amit Pawar  <Amit.Pawar@amd.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #18880]
+	* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
+	instead of Slow_BSF, and also check for Fast_Copy_Backward to
+	enable __memcpy_ssse3_back.
+
 2016-03-03  H.J. Lu  <hongjiu.lu@intel.com>
 
 	[BZ #19758]
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 64a1bcd137..8882590e51 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
 	jz	1f
 	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
 	jz	1f
-	leaq    __memcpy_avx512_no_vzeroupper(%rip), %rax
+	lea    __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
 	ret
 #endif
-1:	leaq	__memcpy_avx_unaligned(%rip), %rax
+1:	lea	__memcpy_avx_unaligned(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz 2f
-	ret
-2:	leaq	__memcpy_sse2(%rip), %rax
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jnz	3f
-	leaq	__memcpy_sse2_unaligned(%rip), %rax
-	ret
-3:	HAS_CPU_FEATURE (SSSE3)
-	jz 4f
-	leaq    __memcpy_ssse3(%rip), %rax
-4:	ret
+	jnz	2f
+	lea	__memcpy_sse2_unaligned(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+	jnz	2f
+	lea	__memcpy_sse2(%rip), %RAX_LP
+	HAS_CPU_FEATURE (SSSE3)
+	jz	2f
+	lea    __memcpy_ssse3_back(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_Copy_Backward)
+	jnz	2f
+	lea	__memcpy_ssse3(%rip), %RAX_LP
+2:	ret
 END(__new_memcpy)
 
 # undef ENTRY