diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-04 08:37:40 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-04-02 09:38:04 -0700 |
commit | e455d17680cfaebb12692547422f95ba1ed30e29 (patch) | |
tree | 2d9160af4b59ae6376b06fa01c6b0f61f9619f5e /sysdeps | |
parent | 0eb234232eaf925fe4dca3bd60a3e1b4a7ab2882 (diff) | |
download | glibc-e455d17680cfaebb12692547422f95ba1ed30e29.tar.gz glibc-e455d17680cfaebb12692547422f95ba1ed30e29.tar.xz glibc-e455d17680cfaebb12692547422f95ba1ed30e29.zip |
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection order is updated with following selection order: 1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set. 2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set. 3. __memcpy_sse2 if SSSE3 isn't available. 4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set. 5. __memcpy_ssse3 [BZ #18880] * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. (cherry picked from commit 14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8)
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy.S | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 64a1bcd137..8882590e51 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -35,22 +35,23 @@ ENTRY(__new_memcpy) jz 1f HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) jz 1f - leaq __memcpy_avx512_no_vzeroupper(%rip), %rax + lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP ret #endif -1: leaq __memcpy_avx_unaligned(%rip), %rax +1: lea __memcpy_avx_unaligned(%rip), %RAX_LP HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz 2f - ret -2: leaq __memcpy_sse2(%rip), %rax - HAS_ARCH_FEATURE (Slow_BSF) - jnz 3f - leaq __memcpy_sse2_unaligned(%rip), %rax - ret -3: HAS_CPU_FEATURE (SSSE3) - jz 4f - leaq __memcpy_ssse3(%rip), %rax -4: ret + jnz 2f + lea __memcpy_sse2_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + jnz 2f + lea __memcpy_sse2(%rip), %RAX_LP + HAS_CPU_FEATURE (SSSE3) + jz 2f + lea __memcpy_ssse3_back(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Copy_Backward) + jnz 2f + lea __memcpy_ssse3(%rip), %RAX_LP +2: ret END(__new_memcpy) # undef ENTRY |