diff options
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strrchr-sse2.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/strrchr-sse2.S | 708 |
1 files changed, 0 insertions, 708 deletions
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S deleted file mode 100644 index 20934288be..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2.S +++ /dev/null @@ -1,708 +0,0 @@ -/* strrchr SSE2 without bsf and bsr - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi); -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__strrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %ecx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - mov %ebx, %eax - mov %esi, %edi - - POP (%ebx) - POP (%esi) - - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(find_zero_8) - test $0x01, %cl - jnz L(FindZeroExit1) - test $0x02, %cl - jnz L(FindZeroExit2) - test $0x04, %cl - jnz L(FindZeroExit3) - and $1 << 4 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_8): - test $0x10, %cl - jnz L(FindZeroExit5) - test $0x20, %cl - jnz L(FindZeroExit6) - test $0x40, %cl - jnz L(FindZeroExit7) - and $1 << 8 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(find_zero_high_8) - test $0x01, %ch - jnz L(FindZeroExit9) - test $0x02, %ch - jnz L(FindZeroExit10) - test $0x04, %ch - jnz L(FindZeroExit11) - and $1 << 12 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high_8): - test $0x10, %ch - jnz L(FindZeroExit13) - test $0x20, %ch - jnz L(FindZeroExit14) - test $0x40, %ch - jnz L(FindZeroExit15) - and $1 << 16 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit1): - and $1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit2): - and $1 << 2 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit3): - and $1 << 3 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit5): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit6): - and $1 << 6 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit7): - and $1 << 7 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit9): - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit10): - and $1 << 10 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit11): - and $1 << 11 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit13): - and $1 << 13 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit14): - and $1 << 14 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit15): - and $1 << 15 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - .p2align 4 -L(match_exit): - test %ah, %ah - jnz L(match_exit_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(match_exit_8) - test $0x08, %al - jnz L(Exit4) - test $0x04, %al - jnz L(Exit3) - test $0x02, %al - jnz L(Exit2) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_8): - test $0x80, %al - jnz L(Exit8) - test $0x40, %al - jnz L(Exit7) - test $0x20, %al - jnz L(Exit6) - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(match_exit_high_8) - test $0x08, %ah - jnz L(Exit12) - test $0x04, %ah - jnz L(Exit11) - test $0x02, %ah - jnz L(Exit10) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high_8): - test $0x80, %ah - jnz L(Exit16) - test $0x40, %ah - jnz L(Exit15) - test $0x20, %ah - jnz L(Exit14) - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(Exit2): - lea -15(%edi), %eax - RETURN - - .p2align 4 -L(Exit3): - lea -14(%edi), %eax - RETURN - - .p2align 4 -L(Exit4): - lea -13(%edi), %eax - RETURN - - .p2align 4 -L(Exit6): - lea -11(%edi), %eax - RETURN - - .p2align 4 -L(Exit7): - lea -10(%edi), %eax - RETURN - - .p2align 4 -L(Exit8): - lea -9(%edi), %eax - RETURN - - .p2align 4 -L(Exit10): - lea -7(%edi), %eax - RETURN - - .p2align 4 -L(Exit11): - lea -6(%edi), %eax - RETURN - - .p2align 4 -L(Exit12): - lea -5(%edi), %eax - RETURN - - .p2align 4 -L(Exit14): - lea -3(%edi), %eax - RETURN - - .p2align 4 -L(Exit15): - lea -2(%edi), %eax - RETURN - - .p2align 4 -L(Exit16): - lea -1(%edi), %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(prolog_find_zero_8) - test $0x01, %cl - jnz L(PrologFindZeroExit1) - test $0x02, %cl - jnz L(PrologFindZeroExit2) - test $0x04, %cl - jnz L(PrologFindZeroExit3) - and $1 << 4 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_8): - test $0x10, %cl - jnz L(PrologFindZeroExit5) - test $0x20, %cl - jnz L(PrologFindZeroExit6) - test $0x40, %cl - jnz L(PrologFindZeroExit7) - and $1 << 8 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(prolog_find_zero_high_8) - test $0x01, %ch - jnz L(PrologFindZeroExit9) - test $0x02, %ch - jnz L(PrologFindZeroExit10) - test $0x04, %ch - jnz L(PrologFindZeroExit11) - and $1 << 12 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high_8): - test $0x10, %ch - jnz L(PrologFindZeroExit13) - test $0x20, %ch - jnz L(PrologFindZeroExit14) - test $0x40, %ch - jnz L(PrologFindZeroExit15) - and $1 << 16 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit1): - and $1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit2): - and $1 << 2 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit3): - and $1 << 3 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit5): - and $1 << 5 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit6): - and $1 << 6 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit7): - and $1 << 7 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit9): - and $1 << 9 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit10): - and $1 << 10 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit11): - and $1 << 11 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit13): - and $1 << 13 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit14): - and $1 << 14 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit15): - and $1 << 15 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - -END (__strrchr_sse2) -#endif |