diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-04 08:37:40 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-03-04 08:39:07 -0800 |
commit | 14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8 (patch) | |
tree | 86611a9511bcc3cafb5de83890af6c0508e569a9 | |
parent | 4b230f6a60f3bb9cae92306d016535f40578ff2e (diff) | |
download | glibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.tar.gz glibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.tar.xz glibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.zip |
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection order is updated with following selection order: 1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set. 2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set. 3. __memcpy_sse2 if SSSE3 isn't available. 4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set. 5. __memcpy_ssse3 [BZ #18880] * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back.
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy.S | 27 |
2 files changed, 22 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog index 7c5ee2dd19..7b36bd732d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2016-03-04 Amit Pawar <Amit.Pawar@amd.com> + H.J. Lu <hongjiu.lu@intel.com> + + [BZ #18880] + * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, + instead of Slow_BSF, and also check for Fast_Copy_Backward to + enable __memcpy_ssse3_back. + 2016-03-03 H.J. Lu <hongjiu.lu@intel.com> [BZ #19758] diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 64a1bcd137..8882590e51 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -35,22 +35,23 @@ ENTRY(__new_memcpy) jz 1f HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) jz 1f - leaq __memcpy_avx512_no_vzeroupper(%rip), %rax + lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP ret #endif -1: leaq __memcpy_avx_unaligned(%rip), %rax +1: lea __memcpy_avx_unaligned(%rip), %RAX_LP HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz 2f - ret -2: leaq __memcpy_sse2(%rip), %rax - HAS_ARCH_FEATURE (Slow_BSF) - jnz 3f - leaq __memcpy_sse2_unaligned(%rip), %rax - ret -3: HAS_CPU_FEATURE (SSSE3) - jz 4f - leaq __memcpy_ssse3(%rip), %rax -4: ret + jnz 2f + lea __memcpy_sse2_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + jnz 2f + lea __memcpy_sse2(%rip), %RAX_LP + HAS_CPU_FEATURE (SSSE3) + jz 2f + lea __memcpy_ssse3_back(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Copy_Backward) + jnz 2f + lea __memcpy_ssse3(%rip), %RAX_LP +2: ret END(__new_memcpy) # undef ENTRY |