about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/memrchr-avx2.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 07:26:42 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit7ebba91361badf7531d4e75050627a88d424872f (patch)
treed99781a37b47b95441ad358d119ec3741960d405 /sysdeps/x86_64/multiarch/memrchr-avx2.S
parent91264fe3577fe887b4860923fa6142b5274c8965 (diff)
downloadglibc-7ebba91361badf7531d4e75050627a88d424872f.tar.gz
glibc-7ebba91361badf7531d4e75050627a88d424872f.tar.xz
glibc-7ebba91361badf7531d4e75050627a88d424872f.zip
x86-64: Add AVX optimized string/memory functions for RTM
Since VZEROUPPER triggers RTM abort while VZEROALL won't, select AVX
optimized string/memory functions with

	xtest
	jz	1f
	vzeroall
	ret
1:
	vzeroupper
	ret

at function exit on processors with usable RTM, but without 256-bit EVEX
instructions to avoid VZEROUPPER inside a transactionally executing RTM
region.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memrchr-avx2.S')
-rw-r--r--sysdeps/x86_64/multiarch/memrchr-avx2.S53
1 files changed, 27 insertions, 26 deletions
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
index eddede45be..ac7370cb06 100644
--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
@@ -20,14 +20,22 @@
 
 # include <sysdep.h>
 
+# ifndef MEMRCHR
+#  define MEMRCHR	__memrchr_avx2
+# endif
+
 # ifndef VZEROUPPER
 #  define VZEROUPPER	vzeroupper
 # endif
 
+# ifndef SECTION
+#  define SECTION(p)	p##.avx
+# endif
+
 # define VEC_SIZE 32
 
-	.section .text.avx,"ax",@progbits
-ENTRY (__memrchr_avx2)
+	.section SECTION(.text),"ax",@progbits
+ENTRY (MEMRCHR)
 	/* Broadcast CHAR to YMM0.  */
 	vmovd	%esi, %xmm0
 	vpbroadcastb %xmm0, %ymm0
@@ -134,8 +142,8 @@ L(loop_4x_vec):
 	vpmovmskb %ymm1, %eax
 	bsrl	%eax, %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+L(return_vzeroupper):
+	ZERO_UPPER_VEC_REGISTERS_RETURN
 
 	.p2align 4
 L(last_4x_vec_or_less):
@@ -169,8 +177,7 @@ L(last_4x_vec_or_less):
 	addq	%rax, %rdx
 	jl	L(zero)
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_2x_vec):
@@ -191,31 +198,27 @@ L(last_2x_vec):
 	jl	L(zero)
 	addl	$(VEC_SIZE * 2), %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_x0):
 	bsrl	%eax, %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_x1):
 	bsrl	%eax, %eax
 	addl	$VEC_SIZE, %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_x2):
 	bsrl	%eax, %eax
 	addl	$(VEC_SIZE * 2), %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_x3):
@@ -232,8 +235,7 @@ L(last_vec_x1_check):
 	jl	L(zero)
 	addl	$VEC_SIZE, %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_x3_check):
@@ -243,12 +245,14 @@ L(last_vec_x3_check):
 	jl	L(zero)
 	addl	$(VEC_SIZE * 3), %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(zero):
-	VZEROUPPER
+	xorl	%eax, %eax
+	VZEROUPPER_RETURN
+
+	.p2align 4
 L(null):
 	xorl	%eax, %eax
 	ret
@@ -273,8 +277,7 @@ L(last_vec_or_less_aligned):
 
 	bsrl	%eax, %eax
 	addq	%rdi, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_or_less):
@@ -315,8 +318,7 @@ L(last_vec_or_less):
 	bsrl	%eax, %eax
 	addq	%rdi, %rax
 	addq	%r8, %rax
-	VZEROUPPER
-	ret
+	VZEROUPPER_RETURN
 
 	.p2align 4
 L(last_vec_2x_aligned):
@@ -353,7 +355,6 @@ L(last_vec_2x_aligned):
 	bsrl	%eax, %eax
 	addq	%rdi, %rax
 	addq	%r8, %rax
-	VZEROUPPER
-	ret
-END (__memrchr_avx2)
+	VZEROUPPER_RETURN
+END (MEMRCHR)
 #endif