about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2022-02-07 05:55:15 -0800
committerH.J. Lu <hjl.tools@gmail.com>2022-02-08 15:58:56 -0800
commit3d9f171bfb5325bd5f427e9fc386453358c6e840 (patch)
tree5caeddafa6cb494bbe102c34f7f2ee630f5d83cf /sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
parentdc98eeeb952f59bdbd51da0409e732756fe30170 (diff)
downloadglibc-3d9f171bfb5325bd5f427e9fc386453358c6e840.tar.gz
glibc-3d9f171bfb5325bd5f427e9fc386453358c6e840.tar.xz
glibc-3d9f171bfb5325bd5f427e9fc386453358c6e840.zip
x86-64: Optimize bzero
memset with zero as the value to set is by far the majority value (99%+
for Python3 and GCC).

bzero can be slightly more optimized for this case by using a zero-idiom
xor for broadcasting the set value to a register (vector or GPR).

Co-developed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S')
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S6
1 files changed, 6 insertions, 0 deletions
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
index c0bf2875d0..a093a2831f 100644
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -14,6 +14,9 @@
   vmovd d, %xmm0; \
   movq r, %rax;
 
+# define BZERO_ZERO_VEC0() \
+  vpxor %xmm0, %xmm0, %xmm0
+
 # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
   MEMSET_SET_VEC0_AND_SET_RETURN(d, r)
 
@@ -29,6 +32,9 @@
 # ifndef MEMSET_SYMBOL
 #  define MEMSET_SYMBOL(p,s)	p##_avx2_##s
 # endif
+# ifndef BZERO_SYMBOL
+#  define BZERO_SYMBOL(p,s)	p##_avx2_##s
+# endif
 # ifndef WMEMSET_SYMBOL
 #  define WMEMSET_SYMBOL(p,s)	p##_avx2_##s
 # endif