From 3d9f171bfb5325bd5f427e9fc386453358c6e840 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 7 Feb 2022 05:55:15 -0800 Subject: x86-64: Optimize bzero memset with zero as the value to set is by far the majority value (99%+ for Python3 and GCC). bzero can be slightly more optimized for this case by using a zero-idiom xor for broadcasting the set value to a register (vector or GPR). Co-developed-by: Noah Goldstein --- sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S') diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S index 6370021506..5d8fa78f05 100644 --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S @@ -19,6 +19,9 @@ vpbroadcastb d, %VEC0; \ movq r, %rax +# define BZERO_ZERO_VEC0() \ + vpxorq %XMM0, %XMM0, %XMM0 + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ vpbroadcastd d, %VEC0; \ movq r, %rax -- cgit 1.4.1