diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-01-15 12:21:15 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-01-15 12:21:15 -0800 |
commit | 9c4de13284bb910d9169f071218ad35eef1b8a1d (patch) | |
tree | 73c5f4ac5a6876019a4cb8e89583b196948337bf | |
parent | 890a46047fc84a97aec0321c4e52de33ec0f00c1 (diff) | |
download | glibc-hjl/avx512f-mem/old.tar.gz glibc-hjl/avx512f-mem/old.tar.xz glibc-hjl/avx512f-mem/old.zip |
Use prefetchnta with non-temporal stores hjl/avx512f-mem/old
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S index 2c96046f52..746c1e7862 100644 --- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S +++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S @@ -340,15 +340,15 @@ L(preloop_large): sub %rdi, %r8 sub %r8, %rsi add %r8, %rdx - prefetcht1 (%rsi) - prefetcht1 0x40(%rsi) - prefetcht1 0x80(%rsi) - prefetcht1 0xC0(%rsi) + prefetchnta (%rsi) + prefetchnta 0x40(%rsi) + prefetchnta 0x80(%rsi) + prefetchnta 0xC0(%rsi) L(gobble_256bytes_nt_loop): - prefetcht1 0x100(%rsi) - prefetcht1 0x140(%rsi) - prefetcht1 0x180(%rsi) - prefetcht1 0x1C0(%rsi) + prefetchnta 0x100(%rsi) + prefetchnta 0x140(%rsi) + prefetchnta 0x180(%rsi) + prefetchnta 0x1C0(%rsi) vmovdqu64 (%rsi), %zmm0 vmovdqu64 0x40(%rsi), %zmm1 vmovdqu64 0x80(%rsi), %zmm2 @@ -378,15 +378,15 @@ L(preloop_large_bkw): sub %r8, %rcx sub %r8, %rdx add %r9, %r8 - prefetcht1 -0x100(%rcx) - prefetcht1 -0xC0(%rcx) - prefetcht1 -0x80(%rcx) - prefetcht1 -0x40(%rcx) + prefetchnta -0x100(%rcx) + prefetchnta -0xC0(%rcx) + prefetchnta -0x80(%rcx) + prefetchnta -0x40(%rcx) L(gobble_256bytes_nt_loop_bkw): - prefetcht1 -0x200(%rcx) - prefetcht1 -0x1C0(%rcx) - prefetcht1 -0x180(%rcx) - prefetcht1 -0x140(%rcx) + prefetchnta -0x200(%rcx) + prefetchnta -0x1C0(%rcx) + prefetchnta -0x180(%rcx) + prefetchnta -0x140(%rcx) vmovdqu64 -0x100(%rcx), %zmm0 vmovdqu64 -0xC0(%rcx), %zmm1 vmovdqu64 -0x80(%rcx), %zmm2 |