diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-07-12 12:29:04 -0700 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-07-13 14:55:31 -0700 |
commit | 425647458b03652526f670da7a0c2605513cf450 (patch) | |
tree | 675e2583679b0522f3f3f9f30722c1667ad9666d | |
parent | 08af081ffd3baa371435da0c6906453e9c8be5f5 (diff) | |
download | glibc-425647458b03652526f670da7a0c2605513cf450.tar.gz glibc-425647458b03652526f670da7a0c2605513cf450.tar.xz glibc-425647458b03652526f670da7a0c2605513cf450.zip |
x86: Move strrchr SSE2 implementation to multiarch/strrchr-sse2.S
This commit doesn't affect libc.so.6, its just housekeeping to prepare for adding explicit ISA level support. Tested build on x86_64 and x86_32 with/without multiarch.
-rw-r--r-- | sysdeps/x86_64/multiarch/strrchr-sse2.S | 358 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/wcsrchr-sse2.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/strrchr.S | 364 | ||||
-rw-r--r-- | sysdeps/x86_64/wcsrchr.S | 11 |
4 files changed, 366 insertions, 377 deletions
diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S index 866396e947..6ee7a5e33a 100644 --- a/sysdeps/x86_64/multiarch/strrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S @@ -17,12 +17,358 @@ <https://www.gnu.org/licenses/>. */ #if IS_IN (libc) -# define STRRCHR __strrchr_sse2 +# ifndef STRRCHR +# define STRRCHR __strrchr_sse2 +# endif +#endif + +#include <sysdep.h> + +#ifdef USE_AS_WCSRCHR +# define PCMPEQ pcmpeqd +# define CHAR_SIZE 4 +# define PMINU pminud +#else +# define PCMPEQ pcmpeqb +# define CHAR_SIZE 1 +# define PMINU pminub +#endif + +#define PAGE_SIZE 4096 +#define VEC_SIZE 16 + + .text +ENTRY(STRRCHR) + movd %esi, %xmm0 + movq %rdi, %rax + andl $(PAGE_SIZE - 1), %eax +#ifndef USE_AS_WCSRCHR + punpcklbw %xmm0, %xmm0 + punpcklwd %xmm0, %xmm0 +#endif + pshufd $0, %xmm0, %xmm0 + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page) + +L(cross_page_continue): + movups (%rdi), %xmm1 + pxor %xmm2, %xmm2 + PCMPEQ %xmm1, %xmm2 + pmovmskb %xmm2, %ecx + testl %ecx, %ecx + jz L(aligned_more) + + PCMPEQ %xmm0, %xmm1 + pmovmskb %xmm1, %eax + leal -1(%rcx), %edx + xorl %edx, %ecx + andl %ecx, %eax + jz L(ret0) + bsrl %eax, %eax + addq %rdi, %rax + /* We are off by 3 for wcsrchr if search CHAR is non-zero. If + search CHAR is zero we are correct. Either way `andq + -CHAR_SIZE, %rax` gets the correct result. */ +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif +L(ret0): + ret + + /* Returns for first vec x1/x2 have hard coded backward search + path for earlier matches. */ + .p2align 4 +L(first_vec_x0_test): + PCMPEQ %xmm0, %xmm1 + pmovmskb %xmm1, %eax + testl %eax, %eax + jz L(ret0) + bsrl %eax, %eax + addq %r8, %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4 +L(first_vec_x1): + PCMPEQ %xmm0, %xmm2 + pmovmskb %xmm2, %eax + leal -1(%rcx), %edx + xorl %edx, %ecx + andl %ecx, %eax + jz L(first_vec_x0_test) + bsrl %eax, %eax + leaq (VEC_SIZE)(%rdi, %rax), %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4 +L(first_vec_x1_test): + PCMPEQ %xmm0, %xmm2 + pmovmskb %xmm2, %eax + testl %eax, %eax + jz L(first_vec_x0_test) + bsrl %eax, %eax + leaq (VEC_SIZE)(%rdi, %rax), %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4 +L(first_vec_x2): + PCMPEQ %xmm0, %xmm3 + pmovmskb %xmm3, %eax + leal -1(%rcx), %edx + xorl %edx, %ecx + andl %ecx, %eax + jz L(first_vec_x1_test) + bsrl %eax, %eax + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4 +L(aligned_more): + /* Save original pointer if match was in VEC 0. */ + movq %rdi, %r8 + andq $-VEC_SIZE, %rdi + + movaps VEC_SIZE(%rdi), %xmm2 + pxor %xmm3, %xmm3 + PCMPEQ %xmm2, %xmm3 + pmovmskb %xmm3, %ecx + testl %ecx, %ecx + jnz L(first_vec_x1) + + movaps (VEC_SIZE * 2)(%rdi), %xmm3 + pxor %xmm4, %xmm4 + PCMPEQ %xmm3, %xmm4 + pmovmskb %xmm4, %ecx + testl %ecx, %ecx + jnz L(first_vec_x2) + + addq $VEC_SIZE, %rdi + /* Save pointer again before realigning. */ + movq %rdi, %rsi + andq $-(VEC_SIZE * 2), %rdi + .p2align 4 +L(first_loop): + /* Do 2x VEC at a time. */ + movaps (VEC_SIZE * 2)(%rdi), %xmm4 + movaps (VEC_SIZE * 3)(%rdi), %xmm5 + /* Since SSE2 no pminud so wcsrchr needs seperate logic for + detecting zero. Note if this is found to be a bottleneck it + may be worth adding an SSE4.1 wcsrchr implementation. */ +#ifdef USE_AS_WCSRCHR + movaps %xmm5, %xmm6 + pxor %xmm8, %xmm8 + + PCMPEQ %xmm8, %xmm5 + PCMPEQ %xmm4, %xmm8 + por %xmm5, %xmm8 +#else + movaps %xmm5, %xmm6 + PMINU %xmm4, %xmm5 +#endif + + movaps %xmm4, %xmm9 + PCMPEQ %xmm0, %xmm4 + PCMPEQ %xmm0, %xmm6 + movaps %xmm6, %xmm7 + por %xmm4, %xmm6 +#ifndef USE_AS_WCSRCHR + pxor %xmm8, %xmm8 + PCMPEQ %xmm5, %xmm8 +#endif + pmovmskb %xmm8, %ecx + pmovmskb %xmm6, %eax -# undef weak_alias -# define weak_alias(strrchr, rindex) -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(strrchr) + addq $(VEC_SIZE * 2), %rdi + /* Use `addl` 1) so we can undo it with `subl` and 2) it can + macro-fuse with `jz`. */ + addl %ecx, %eax + jz L(first_loop) + + /* Check if there is zero match. */ + testl %ecx, %ecx + jz L(second_loop_match) + + /* Check if there was a match in last iteration. */ + subl %ecx, %eax + jnz L(new_match) + +L(first_loop_old_match): + PCMPEQ %xmm0, %xmm2 + PCMPEQ %xmm0, %xmm3 + pmovmskb %xmm2, %ecx + pmovmskb %xmm3, %eax + addl %eax, %ecx + jz L(first_vec_x0_test) + /* NB: We could move this shift to before the branch and save a + bit of code size / performance on the fall through. The + branch leads to the null case which generally seems hotter + than char in first 3x VEC. */ + sall $16, %eax + orl %ecx, %eax + + bsrl %eax, %eax + addq %rsi, %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4 +L(new_match): + pxor %xmm6, %xmm6 + PCMPEQ %xmm9, %xmm6 + pmovmskb %xmm6, %eax + sall $16, %ecx + orl %eax, %ecx + + /* We can't reuse either of the old comparisons as since we mask + of zeros after first zero (instead of using the full + comparison) we can't gurantee no interference between match + after end of string and valid match. */ + pmovmskb %xmm4, %eax + pmovmskb %xmm7, %edx + sall $16, %edx + orl %edx, %eax + + leal -1(%ecx), %edx + xorl %edx, %ecx + andl %ecx, %eax + jz L(first_loop_old_match) + bsrl %eax, %eax + addq %rdi, %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + /* Save minimum state for getting most recent match. We can + throw out all previous work. */ + .p2align 4 +L(second_loop_match): + movq %rdi, %rsi + movaps %xmm4, %xmm2 + movaps %xmm7, %xmm3 + + .p2align 4 +L(second_loop): + movaps (VEC_SIZE * 2)(%rdi), %xmm4 + movaps (VEC_SIZE * 3)(%rdi), %xmm5 + /* Since SSE2 no pminud so wcsrchr needs seperate logic for + detecting zero. Note if this is found to be a bottleneck it + may be worth adding an SSE4.1 wcsrchr implementation. */ +#ifdef USE_AS_WCSRCHR + movaps %xmm5, %xmm6 + pxor %xmm8, %xmm8 + + PCMPEQ %xmm8, %xmm5 + PCMPEQ %xmm4, %xmm8 + por %xmm5, %xmm8 +#else + movaps %xmm5, %xmm6 + PMINU %xmm4, %xmm5 +#endif + + movaps %xmm4, %xmm9 + PCMPEQ %xmm0, %xmm4 + PCMPEQ %xmm0, %xmm6 + movaps %xmm6, %xmm7 + por %xmm4, %xmm6 +#ifndef USE_AS_WCSRCHR + pxor %xmm8, %xmm8 + PCMPEQ %xmm5, %xmm8 #endif -#include "../strrchr.S" + pmovmskb %xmm8, %ecx + pmovmskb %xmm6, %eax + + addq $(VEC_SIZE * 2), %rdi + /* Either null term or new occurence of CHAR. */ + addl %ecx, %eax + jz L(second_loop) + + /* No null term so much be new occurence of CHAR. */ + testl %ecx, %ecx + jz L(second_loop_match) + + + subl %ecx, %eax + jnz L(second_loop_new_match) + +L(second_loop_old_match): + pmovmskb %xmm2, %ecx + pmovmskb %xmm3, %eax + sall $16, %eax + orl %ecx, %eax + bsrl %eax, %eax + addq %rsi, %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4 +L(second_loop_new_match): + pxor %xmm6, %xmm6 + PCMPEQ %xmm9, %xmm6 + pmovmskb %xmm6, %eax + sall $16, %ecx + orl %eax, %ecx + + /* We can't reuse either of the old comparisons as since we mask + of zeros after first zero (instead of using the full + comparison) we can't gurantee no interference between match + after end of string and valid match. */ + pmovmskb %xmm4, %eax + pmovmskb %xmm7, %edx + sall $16, %edx + orl %edx, %eax + + leal -1(%ecx), %edx + xorl %edx, %ecx + andl %ecx, %eax + jz L(second_loop_old_match) + bsrl %eax, %eax + addq %rdi, %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif + ret + + .p2align 4,, 4 +L(cross_page): + movq %rdi, %rsi + andq $-VEC_SIZE, %rsi + movaps (%rsi), %xmm1 + pxor %xmm2, %xmm2 + PCMPEQ %xmm1, %xmm2 + pmovmskb %xmm2, %edx + movl %edi, %ecx + andl $(VEC_SIZE - 1), %ecx + sarl %cl, %edx + jz L(cross_page_continue) + PCMPEQ %xmm0, %xmm1 + pmovmskb %xmm1, %eax + sarl %cl, %eax + leal -1(%rdx), %ecx + xorl %edx, %ecx + andl %ecx, %eax + jz L(ret1) + bsrl %eax, %eax + addq %rdi, %rax +#ifdef USE_AS_WCSRCHR + andq $-CHAR_SIZE, %rax +#endif +L(ret1): + ret +END(STRRCHR) diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S index 69d2f3cdb1..d9259720f8 100644 --- a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S @@ -17,6 +17,12 @@ <https://www.gnu.org/licenses/>. */ #if IS_IN (libc) -# define STRRCHR __wcsrchr_sse2 +# ifndef STRRCHR +# define STRRCHR __wcsrchr_sse2 +# endif #endif -#include "../wcsrchr.S" + +#define USE_AS_WCSRCHR 1 +#define NO_PMINU 1 + +#include "strrchr-sse2.S" diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S index 4d7ba4ceb2..f39da60454 100644 --- a/sysdeps/x86_64/strrchr.S +++ b/sysdeps/x86_64/strrchr.S @@ -16,363 +16,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#ifndef STRRCHR -# define STRRCHR strrchr -#endif - -#ifdef USE_AS_WCSRCHR -# define PCMPEQ pcmpeqd -# define CHAR_SIZE 4 -# define PMINU pminud -#else -# define PCMPEQ pcmpeqb -# define CHAR_SIZE 1 -# define PMINU pminub -#endif - -#define PAGE_SIZE 4096 -#define VEC_SIZE 16 - - .text -ENTRY(STRRCHR) - movd %esi, %xmm0 - movq %rdi, %rax - andl $(PAGE_SIZE - 1), %eax -#ifndef USE_AS_WCSRCHR - punpcklbw %xmm0, %xmm0 - punpcklwd %xmm0, %xmm0 -#endif - pshufd $0, %xmm0, %xmm0 - cmpl $(PAGE_SIZE - VEC_SIZE), %eax - ja L(cross_page) - -L(cross_page_continue): - movups (%rdi), %xmm1 - pxor %xmm2, %xmm2 - PCMPEQ %xmm1, %xmm2 - pmovmskb %xmm2, %ecx - testl %ecx, %ecx - jz L(aligned_more) - - PCMPEQ %xmm0, %xmm1 - pmovmskb %xmm1, %eax - leal -1(%rcx), %edx - xorl %edx, %ecx - andl %ecx, %eax - jz L(ret0) - bsrl %eax, %eax - addq %rdi, %rax - /* We are off by 3 for wcsrchr if search CHAR is non-zero. If - search CHAR is zero we are correct. Either way `andq - -CHAR_SIZE, %rax` gets the correct result. */ -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif -L(ret0): - ret - - /* Returns for first vec x1/x2 have hard coded backward search - path for earlier matches. */ - .p2align 4 -L(first_vec_x0_test): - PCMPEQ %xmm0, %xmm1 - pmovmskb %xmm1, %eax - testl %eax, %eax - jz L(ret0) - bsrl %eax, %eax - addq %r8, %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4 -L(first_vec_x1): - PCMPEQ %xmm0, %xmm2 - pmovmskb %xmm2, %eax - leal -1(%rcx), %edx - xorl %edx, %ecx - andl %ecx, %eax - jz L(first_vec_x0_test) - bsrl %eax, %eax - leaq (VEC_SIZE)(%rdi, %rax), %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4 -L(first_vec_x1_test): - PCMPEQ %xmm0, %xmm2 - pmovmskb %xmm2, %eax - testl %eax, %eax - jz L(first_vec_x0_test) - bsrl %eax, %eax - leaq (VEC_SIZE)(%rdi, %rax), %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4 -L(first_vec_x2): - PCMPEQ %xmm0, %xmm3 - pmovmskb %xmm3, %eax - leal -1(%rcx), %edx - xorl %edx, %ecx - andl %ecx, %eax - jz L(first_vec_x1_test) - bsrl %eax, %eax - leaq (VEC_SIZE * 2)(%rdi, %rax), %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4 -L(aligned_more): - /* Save original pointer if match was in VEC 0. */ - movq %rdi, %r8 - andq $-VEC_SIZE, %rdi - - movaps VEC_SIZE(%rdi), %xmm2 - pxor %xmm3, %xmm3 - PCMPEQ %xmm2, %xmm3 - pmovmskb %xmm3, %ecx - testl %ecx, %ecx - jnz L(first_vec_x1) - - movaps (VEC_SIZE * 2)(%rdi), %xmm3 - pxor %xmm4, %xmm4 - PCMPEQ %xmm3, %xmm4 - pmovmskb %xmm4, %ecx - testl %ecx, %ecx - jnz L(first_vec_x2) - - addq $VEC_SIZE, %rdi - /* Save pointer again before realigning. */ - movq %rdi, %rsi - andq $-(VEC_SIZE * 2), %rdi - .p2align 4 -L(first_loop): - /* Do 2x VEC at a time. */ - movaps (VEC_SIZE * 2)(%rdi), %xmm4 - movaps (VEC_SIZE * 3)(%rdi), %xmm5 - /* Since SSE2 no pminud so wcsrchr needs seperate logic for - detecting zero. Note if this is found to be a bottleneck it - may be worth adding an SSE4.1 wcsrchr implementation. */ -#ifdef USE_AS_WCSRCHR - movaps %xmm5, %xmm6 - pxor %xmm8, %xmm8 - - PCMPEQ %xmm8, %xmm5 - PCMPEQ %xmm4, %xmm8 - por %xmm5, %xmm8 -#else - movaps %xmm5, %xmm6 - PMINU %xmm4, %xmm5 -#endif - - movaps %xmm4, %xmm9 - PCMPEQ %xmm0, %xmm4 - PCMPEQ %xmm0, %xmm6 - movaps %xmm6, %xmm7 - por %xmm4, %xmm6 -#ifndef USE_AS_WCSRCHR - pxor %xmm8, %xmm8 - PCMPEQ %xmm5, %xmm8 -#endif - pmovmskb %xmm8, %ecx - pmovmskb %xmm6, %eax - - addq $(VEC_SIZE * 2), %rdi - /* Use `addl` 1) so we can undo it with `subl` and 2) it can - macro-fuse with `jz`. */ - addl %ecx, %eax - jz L(first_loop) - - /* Check if there is zero match. */ - testl %ecx, %ecx - jz L(second_loop_match) - - /* Check if there was a match in last iteration. */ - subl %ecx, %eax - jnz L(new_match) - -L(first_loop_old_match): - PCMPEQ %xmm0, %xmm2 - PCMPEQ %xmm0, %xmm3 - pmovmskb %xmm2, %ecx - pmovmskb %xmm3, %eax - addl %eax, %ecx - jz L(first_vec_x0_test) - /* NB: We could move this shift to before the branch and save a - bit of code size / performance on the fall through. The - branch leads to the null case which generally seems hotter - than char in first 3x VEC. */ - sall $16, %eax - orl %ecx, %eax - - bsrl %eax, %eax - addq %rsi, %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4 -L(new_match): - pxor %xmm6, %xmm6 - PCMPEQ %xmm9, %xmm6 - pmovmskb %xmm6, %eax - sall $16, %ecx - orl %eax, %ecx - - /* We can't reuse either of the old comparisons as since we mask - of zeros after first zero (instead of using the full - comparison) we can't gurantee no interference between match - after end of string and valid match. */ - pmovmskb %xmm4, %eax - pmovmskb %xmm7, %edx - sall $16, %edx - orl %edx, %eax - - leal -1(%ecx), %edx - xorl %edx, %ecx - andl %ecx, %eax - jz L(first_loop_old_match) - bsrl %eax, %eax - addq %rdi, %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - /* Save minimum state for getting most recent match. We can - throw out all previous work. */ - .p2align 4 -L(second_loop_match): - movq %rdi, %rsi - movaps %xmm4, %xmm2 - movaps %xmm7, %xmm3 - - .p2align 4 -L(second_loop): - movaps (VEC_SIZE * 2)(%rdi), %xmm4 - movaps (VEC_SIZE * 3)(%rdi), %xmm5 - /* Since SSE2 no pminud so wcsrchr needs seperate logic for - detecting zero. Note if this is found to be a bottleneck it - may be worth adding an SSE4.1 wcsrchr implementation. */ -#ifdef USE_AS_WCSRCHR - movaps %xmm5, %xmm6 - pxor %xmm8, %xmm8 - - PCMPEQ %xmm8, %xmm5 - PCMPEQ %xmm4, %xmm8 - por %xmm5, %xmm8 -#else - movaps %xmm5, %xmm6 - PMINU %xmm4, %xmm5 -#endif - - movaps %xmm4, %xmm9 - PCMPEQ %xmm0, %xmm4 - PCMPEQ %xmm0, %xmm6 - movaps %xmm6, %xmm7 - por %xmm4, %xmm6 -#ifndef USE_AS_WCSRCHR - pxor %xmm8, %xmm8 - PCMPEQ %xmm5, %xmm8 -#endif - - pmovmskb %xmm8, %ecx - pmovmskb %xmm6, %eax - - addq $(VEC_SIZE * 2), %rdi - /* Either null term or new occurence of CHAR. */ - addl %ecx, %eax - jz L(second_loop) - - /* No null term so much be new occurence of CHAR. */ - testl %ecx, %ecx - jz L(second_loop_match) - - - subl %ecx, %eax - jnz L(second_loop_new_match) - -L(second_loop_old_match): - pmovmskb %xmm2, %ecx - pmovmskb %xmm3, %eax - sall $16, %eax - orl %ecx, %eax - bsrl %eax, %eax - addq %rsi, %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4 -L(second_loop_new_match): - pxor %xmm6, %xmm6 - PCMPEQ %xmm9, %xmm6 - pmovmskb %xmm6, %eax - sall $16, %ecx - orl %eax, %ecx - - /* We can't reuse either of the old comparisons as since we mask - of zeros after first zero (instead of using the full - comparison) we can't gurantee no interference between match - after end of string and valid match. */ - pmovmskb %xmm4, %eax - pmovmskb %xmm7, %edx - sall $16, %edx - orl %edx, %eax - - leal -1(%ecx), %edx - xorl %edx, %ecx - andl %ecx, %eax - jz L(second_loop_old_match) - bsrl %eax, %eax - addq %rdi, %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif - ret - - .p2align 4,, 4 -L(cross_page): - movq %rdi, %rsi - andq $-VEC_SIZE, %rsi - movaps (%rsi), %xmm1 - pxor %xmm2, %xmm2 - PCMPEQ %xmm1, %xmm2 - pmovmskb %xmm2, %edx - movl %edi, %ecx - andl $(VEC_SIZE - 1), %ecx - sarl %cl, %edx - jz L(cross_page_continue) - PCMPEQ %xmm0, %xmm1 - pmovmskb %xmm1, %eax - sarl %cl, %eax - leal -1(%rdx), %ecx - xorl %edx, %ecx - andl %ecx, %eax - jz L(ret1) - bsrl %eax, %eax - addq %rdi, %rax -#ifdef USE_AS_WCSRCHR - andq $-CHAR_SIZE, %rax -#endif -L(ret1): - ret -END(STRRCHR) - -#ifndef USE_AS_WCSRCHR - weak_alias (STRRCHR, rindex) - libc_hidden_builtin_def (STRRCHR) -#endif +#define STRRCHR strrchr +#include "multiarch/strrchr-sse2.S" +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S index 2b80efc5ef..1d4b1eb21c 100644 --- a/sysdeps/x86_64/wcsrchr.S +++ b/sysdeps/x86_64/wcsrchr.S @@ -16,12 +16,5 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ - -#define USE_AS_WCSRCHR 1 -#define NO_PMINU 1 - -#ifndef STRRCHR -# define STRRCHR wcsrchr -#endif - -#include "../strrchr.S" +#define STRRCHR wcsrchr +#include "multiarch/wcsrchr-sse2.S" |