diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-01-15 12:14:18 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-01-15 12:14:18 -0800 |
commit | e70cffd77b34d2a02d634d33d11802b50899b78b (patch) | |
tree | 052e3963cf7c33912a98d7d651f750627252f1b7 | |
parent | 754cca3aef2a7b8032156642c28e5d727d8374e2 (diff) | |
download | glibc-e70cffd77b34d2a02d634d33d11802b50899b78b.tar.gz glibc-e70cffd77b34d2a02d634d33d11802b50899b78b.tar.xz glibc-e70cffd77b34d2a02d634d33d11802b50899b78b.zip |
Use vmovdqu64 load with vmovntdq store
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S index e1d26a14b9..b12c80e8ad 100644 --- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S +++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S @@ -345,13 +345,13 @@ L(preloop_large): prefetcht1 0x80(%rsi) prefetcht1 0xC0(%rsi) L(gobble_256bytes_nt_loop): - vmovups (%rsi), %zmm0 + vmovdqu64 (%rsi), %zmm0 prefetcht1 0x100(%rsi) - vmovups 0x40(%rsi), %zmm1 + vmovdqu64 0x40(%rsi), %zmm1 prefetcht1 0x140(%rsi) - vmovups 0x80(%rsi), %zmm2 + vmovdqu64 0x80(%rsi), %zmm2 prefetcht1 0x180(%rsi) - vmovups 0xC0(%rsi), %zmm3 + vmovdqu64 0xC0(%rsi), %zmm3 prefetcht1 0x1C0(%rsi) vmovntdq %zmm0, (%rdi) vmovntdq %zmm1, 0x40(%rdi) @@ -383,13 +383,13 @@ L(preloop_large_bkw): prefetcht1 -0x80(%rcx) prefetcht1 -0x40(%rcx) L(gobble_256bytes_nt_loop_bkw): - vmovups -0x100(%rcx), %zmm0 + vmovdqu64 -0x100(%rcx), %zmm0 prefetcht1 -0x200(%rcx) - vmovups -0xC0(%rcx), %zmm1 + vmovdqu64 -0xC0(%rcx), %zmm1 prefetcht1 -0x1C0(%rcx) - vmovups -0x80(%rcx), %zmm2 + vmovdqu64 -0x80(%rcx), %zmm2 prefetcht1 -0x180(%rcx) - vmovups -0x40(%rcx), %zmm3 + vmovdqu64 -0x40(%rcx), %zmm3 prefetcht1 -0x140(%rcx) vmovntdq %zmm0, -0x100(%r9) vmovntdq %zmm1, -0xC0(%r9) |