diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2021-03-05 07:26:42 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2021-03-29 07:40:17 -0700 |
commit | 7ebba91361badf7531d4e75050627a88d424872f (patch) | |
tree | d99781a37b47b95441ad358d119ec3741960d405 /sysdeps/x86_64/multiarch/memrchr-avx2.S | |
parent | 91264fe3577fe887b4860923fa6142b5274c8965 (diff) | |
download | glibc-7ebba91361badf7531d4e75050627a88d424872f.tar.gz glibc-7ebba91361badf7531d4e75050627a88d424872f.tar.xz glibc-7ebba91361badf7531d4e75050627a88d424872f.zip |
x86-64: Add AVX optimized string/memory functions for RTM
Since VZEROUPPER triggers RTM abort while VZEROALL won't, select AVX optimized string/memory functions with xtest jz 1f vzeroall ret 1: vzeroupper ret at function exit on processors with usable RTM, but without 256-bit EVEX instructions to avoid VZEROUPPER inside a transactionally executing RTM region.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memrchr-avx2.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memrchr-avx2.S | 53 |
1 files changed, 27 insertions, 26 deletions
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S index eddede45be..ac7370cb06 100644 --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S @@ -20,14 +20,22 @@ # include <sysdep.h> +# ifndef MEMRCHR +# define MEMRCHR __memrchr_avx2 +# endif + # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits -ENTRY (__memrchr_avx2) + .section SECTION(.text),"ax",@progbits +ENTRY (MEMRCHR) /* Broadcast CHAR to YMM0. */ vmovd %esi, %xmm0 vpbroadcastb %xmm0, %ymm0 @@ -134,8 +142,8 @@ L(loop_4x_vec): vpmovmskb %ymm1, %eax bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(last_4x_vec_or_less): @@ -169,8 +177,7 @@ L(last_4x_vec_or_less): addq %rax, %rdx jl L(zero) addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_2x_vec): @@ -191,31 +198,27 @@ L(last_2x_vec): jl L(zero) addl $(VEC_SIZE * 2), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x0): bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x1): bsrl %eax, %eax addl $VEC_SIZE, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x2): bsrl %eax, %eax addl $(VEC_SIZE * 2), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x3): @@ -232,8 +235,7 @@ L(last_vec_x1_check): jl L(zero) addl $VEC_SIZE, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x3_check): @@ -243,12 +245,14 @@ L(last_vec_x3_check): jl L(zero) addl $(VEC_SIZE * 3), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(zero): - VZEROUPPER + xorl %eax, %eax + VZEROUPPER_RETURN + + .p2align 4 L(null): xorl %eax, %eax ret @@ -273,8 +277,7 @@ L(last_vec_or_less_aligned): bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_or_less): @@ -315,8 +318,7 @@ L(last_vec_or_less): bsrl %eax, %eax addq %rdi, %rax addq %r8, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_2x_aligned): @@ -353,7 +355,6 @@ L(last_vec_2x_aligned): bsrl %eax, %eax addq %rdi, %rax addq %r8, %rax - VZEROUPPER - ret -END (__memrchr_avx2) + VZEROUPPER_RETURN +END (MEMRCHR) #endif |