diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2022-02-07 05:55:15 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2022-02-08 15:58:56 -0800 |
commit | 3d9f171bfb5325bd5f427e9fc386453358c6e840 (patch) | |
tree | 5caeddafa6cb494bbe102c34f7f2ee630f5d83cf /sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S | |
parent | dc98eeeb952f59bdbd51da0409e732756fe30170 (diff) | |
download | glibc-3d9f171bfb5325bd5f427e9fc386453358c6e840.tar.gz glibc-3d9f171bfb5325bd5f427e9fc386453358c6e840.tar.xz glibc-3d9f171bfb5325bd5f427e9fc386453358c6e840.zip |
x86-64: Optimize bzero
memset with zero as the value to set is by far the majority value (99%+ for Python3 and GCC). bzero can be slightly more optimized for this case by using a zero-idiom xor for broadcasting the set value to a register (vector or GPR). Co-developed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S index c0bf2875d0..a093a2831f 100644 --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S @@ -14,6 +14,9 @@ vmovd d, %xmm0; \ movq r, %rax; +# define BZERO_ZERO_VEC0() \ + vpxor %xmm0, %xmm0, %xmm0 + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ MEMSET_SET_VEC0_AND_SET_RETURN(d, r) @@ -29,6 +32,9 @@ # ifndef MEMSET_SYMBOL # define MEMSET_SYMBOL(p,s) p##_avx2_##s # endif +# ifndef BZERO_SYMBOL +# define BZERO_SYMBOL(p,s) p##_avx2_##s +# endif # ifndef WMEMSET_SYMBOL # define WMEMSET_SYMBOL(p,s) p##_avx2_##s # endif |