about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S27
2 files changed, 22 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 7c5ee2dd19..7b36bd732d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2016-03-04  Amit Pawar  <Amit.Pawar@amd.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #18880]
+	* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
+	instead of Slow_BSF, and also check for Fast_Copy_Backward to
+	enable __memcpy_ssse3_back.
+
 2016-03-03  H.J. Lu  <hongjiu.lu@intel.com>
 
 	[BZ #19758]
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 64a1bcd137..8882590e51 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
 	jz	1f
 	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
 	jz	1f
-	leaq    __memcpy_avx512_no_vzeroupper(%rip), %rax
+	lea    __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
 	ret
 #endif
-1:	leaq	__memcpy_avx_unaligned(%rip), %rax
+1:	lea	__memcpy_avx_unaligned(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz 2f
-	ret
-2:	leaq	__memcpy_sse2(%rip), %rax
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jnz	3f
-	leaq	__memcpy_sse2_unaligned(%rip), %rax
-	ret
-3:	HAS_CPU_FEATURE (SSSE3)
-	jz 4f
-	leaq    __memcpy_ssse3(%rip), %rax
-4:	ret
+	jnz	2f
+	lea	__memcpy_sse2_unaligned(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+	jnz	2f
+	lea	__memcpy_sse2(%rip), %RAX_LP
+	HAS_CPU_FEATURE (SSSE3)
+	jz	2f
+	lea    __memcpy_ssse3_back(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_Copy_Backward)
+	jnz	2f
+	lea	__memcpy_ssse3(%rip), %RAX_LP
+2:	ret
 END(__new_memcpy)
 
 # undef ENTRY