diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2021-03-07 09:44:18 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2021-03-29 07:40:17 -0700 |
commit | 4e2d8f352774b56078c34648b14a2412c38384f4 (patch) | |
tree | cafad171d93e598b9c6a1a0457853f6cdc98bf1d /sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S | |
parent | 4bd660be40967cd69072f69ebc2ad32bfcc1f206 (diff) | |
download | glibc-4e2d8f352774b56078c34648b14a2412c38384f4.tar.gz glibc-4e2d8f352774b56078c34648b14a2412c38384f4.tar.xz glibc-4e2d8f352774b56078c34648b14a2412c38384f4.zip |
x86-64: Use ZMM16-ZMM31 in AVX512 memset family functions
Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized with AVX512 instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at function exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S index 0783979ca5..22e7b187c8 100644 --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S @@ -1,22 +1,22 @@ #if IS_IN (libc) # define VEC_SIZE 64 -# define VEC(i) zmm##i +# define XMM0 xmm16 +# define YMM0 ymm16 +# define VEC0 zmm16 +# define VEC(i) VEC##i # define VMOVU vmovdqu64 # define VMOVA vmovdqa64 +# define VZEROUPPER # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastb %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastb d, %VEC0 # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastd %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastd d, %VEC0 -# define SECTION(p) p##.avx512 +# define SECTION(p) p##.evex512 # define MEMSET_SYMBOL(p,s) p##_avx512_##s # define WMEMSET_SYMBOL(p,s) p##_avx512_##s |