about summary refs log tree commit diff
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2022-02-07 00:32:23 -0600
committerNoah Goldstein <goldstein.w.n@gmail.com>2022-02-07 14:18:29 -0600
commit1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1 (patch)
tree35485d9f7af9015366c8f98714d271ce51e6944f
parent03c9c4fce4fefbb34e65723467d86cb68739a9d1 (diff)
downloadglibc-1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1.tar.gz
glibc-1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1.tar.xz
glibc-1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1.zip
x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 Only)
commit b62ace2740a106222e124cc86956448fa07abf4d
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date:   Sun Feb 6 00:54:18 2022 -0600

    x86: Improve vec generation in memset-vec-unaligned-erms.S

Revert usage of 'pshufb' in broadcast logic as it is an SSSE3
instruction and memset.S is restricted to only SSE2 instructions.
-rw-r--r--sysdeps/x86_64/memset.S7
1 files changed, 4 insertions, 3 deletions
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index ccf036be53..3f0517bbfc 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -30,9 +30,10 @@
 
 # define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
   movd d, %xmm0; \
-  pxor %xmm1, %xmm1; \
-  pshufb %xmm1, %xmm0; \
-  movq r, %rax
+  movq r, %rax; \
+  punpcklbw %xmm0, %xmm0; \
+  punpcklwd %xmm0, %xmm0; \
+  pshufd $0, %xmm0, %xmm0
 
 # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
   movd d, %xmm0; \