diff options
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy.S | 27 |
2 files changed, 22 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog index 7c5ee2dd19..7b36bd732d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2016-03-04 Amit Pawar <Amit.Pawar@amd.com> + H.J. Lu <hongjiu.lu@intel.com> + + [BZ #18880] + * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, + instead of Slow_BSF, and also check for Fast_Copy_Backward to + enable __memcpy_ssse3_back. + 2016-03-03 H.J. Lu <hongjiu.lu@intel.com> [BZ #19758] diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 64a1bcd137..8882590e51 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -35,22 +35,23 @@ ENTRY(__new_memcpy) jz 1f HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) jz 1f - leaq __memcpy_avx512_no_vzeroupper(%rip), %rax + lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP ret #endif -1: leaq __memcpy_avx_unaligned(%rip), %rax +1: lea __memcpy_avx_unaligned(%rip), %RAX_LP HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz 2f - ret -2: leaq __memcpy_sse2(%rip), %rax - HAS_ARCH_FEATURE (Slow_BSF) - jnz 3f - leaq __memcpy_sse2_unaligned(%rip), %rax - ret -3: HAS_CPU_FEATURE (SSSE3) - jz 4f - leaq __memcpy_ssse3(%rip), %rax -4: ret + jnz 2f + lea __memcpy_sse2_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + jnz 2f + lea __memcpy_sse2(%rip), %RAX_LP + HAS_CPU_FEATURE (SSSE3) + jz 2f + lea __memcpy_ssse3_back(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Copy_Backward) + jnz 2f + lea __memcpy_ssse3(%rip), %RAX_LP +2: ret END(__new_memcpy) # undef ENTRY |