diff options
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strcat-sse2.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/strcat-sse2.S | 1245 |
1 files changed, 0 insertions, 1245 deletions
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S deleted file mode 100644 index 6359c7330c..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat-sse2.S +++ /dev/null @@ -1,1245 +0,0 @@ -/* strcat with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into ECX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into ECX. */ \ - SETUP_PIC_REG(cx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ecx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ecx,INDEX,SCALE), %ecx; \ - /* We loaded the jump table and adjusted ECX. Go. */ \ - jmp *%ecx -# else -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - -# ifndef STRCAT -# define STRCAT __strcat_sse2 -# endif - -# define PARMS 4 -# define STR1 PARMS+4 -# define STR2 STR1+4 - -# ifdef USE_AS_STRNCAT -# define LEN STR2+8 -# define STR3 STR1+4 -# else -# define STR3 STR1 -# endif - -# define USE_AS_STRCAT -# ifdef USE_AS_STRNCAT -# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); -# else -# define RETURN POP(%esi); ret; CFI_PUSH(%esi); -# endif - -.text -ENTRY (STRCAT) - PUSH (%esi) - mov STR1(%esp), %eax - mov STR2(%esp), %esi -# ifdef USE_AS_STRNCAT - PUSH (%ebx) - movl LEN(%esp), %ebx - test %ebx, %ebx - jz L(ExitZero) -# endif - cmpb $0, (%esi) - mov %esi, %ecx - mov %eax, %edx - jz L(ExitZero) - - and $63, %ecx - and $63, %edx - cmp $32, %ecx - ja L(StrlenCore7_1) - cmp $48, %edx - ja L(alignment_prolog) - - pxor %xmm0, %xmm0 - pxor %xmm4, %xmm4 - pxor %xmm7, %xmm7 - movdqu (%eax), %xmm1 - movdqu (%esi), %xmm5 - pcmpeqb %xmm1, %xmm0 - movdqu 16(%esi), %xmm6 - pmovmskb %xmm0, %ecx - pcmpeqb %xmm5, %xmm4 - pcmpeqb %xmm6, %xmm7 - test %ecx, %ecx - jnz L(exit_less16_) - mov %eax, %ecx - and $-16, %eax - jmp L(loop_prolog) - -L(alignment_prolog): - pxor %xmm0, %xmm0 - pxor %xmm4, %xmm4 - mov %edx, %ecx - pxor %xmm7, %xmm7 - and $15, %ecx - and $-16, %eax - pcmpeqb (%eax), %xmm0 - movdqu (%esi), %xmm5 - movdqu 16(%esi), %xmm6 - pmovmskb %xmm0, %edx - pcmpeqb %xmm5, %xmm4 - shr %cl, %edx - pcmpeqb %xmm6, %xmm7 - test %edx, %edx - jnz L(exit_less16) - add %eax, %ecx - - pxor %xmm0, %xmm0 -L(loop_prolog): - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - .p2align 4 -L(align16_loop): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop) - bsf %edx, %edx - add %edx, %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit16): - bsf %edx, %edx - lea 16(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit32): - bsf %edx, %edx - lea 32(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit48): - bsf %edx, %edx - lea 48(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit_less16): - bsf %edx, %edx - add %ecx, %eax - add %edx, %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit_less16_): - bsf %ecx, %ecx - add %ecx, %eax - - .p2align 4 -L(StartStrcpyPart): - pmovmskb %xmm4, %edx -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1) - - movdqu %xmm5, (%eax) - pmovmskb %xmm7, %edx -# ifdef USE_AS_STRNCAT - cmp $32, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes1) - - mov %esi, %ecx - and $-16, %esi - and $15, %ecx - pxor %xmm0, %xmm0 -# ifdef USE_AS_STRNCAT - add %ecx, %ebx - sbb %edx, %edx - or %edx, %ebx -# endif - sub %ecx, %eax - jmp L(Unalign16Both) - -L(StrlenCore7_1): - mov %eax, %ecx - pxor %xmm0, %xmm0 - and $15, %ecx - and $-16, %eax - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - shr %cl, %edx - test %edx, %edx - jnz L(exit_less16_1) - add %eax, %ecx - - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - - .p2align 4 -L(align16_loop_1): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16_1) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32_1) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48_1) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop_1) - bsf %edx, %edx - add %edx, %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit16_1): - bsf %edx, %edx - lea 16(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit32_1): - bsf %edx, %edx - lea 32(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit48_1): - bsf %edx, %edx - lea 48(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit_less16_1): - bsf %edx, %edx - add %ecx, %eax - add %edx, %eax - - .p2align 4 -L(StartStrcpyPart_1): - mov %esi, %ecx - and $15, %ecx - and $-16, %esi - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - -# ifdef USE_AS_STRNCAT - cmp $48, %ebx - ja L(BigN) -# endif - pcmpeqb (%esi), %xmm1 -# ifdef USE_AS_STRNCAT - add %ecx, %ebx -# endif - pmovmskb %xmm1, %edx - shr %cl, %edx -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STRNCAT - cmp $32, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%eax) - sub %ecx, %eax - - .p2align 4 -L(Unalign16Both): - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $48, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) -L(Unalign16BothBigN): - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%eax, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm4 - movdqu %xmm3, (%eax, %ecx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm1 - movdqu %xmm4, (%eax, %ecx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%eax, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm3, (%eax, %ecx) - mov %esi, %edx - lea 16(%esi, %ecx), %esi - and $-0x40, %esi - sub %esi, %edx - sub %edx, %eax -# ifdef USE_AS_STRNCAT - lea 128(%ebx, %edx), %ebx -# endif - movaps (%esi), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%esi), %xmm5 - movaps 32(%esi), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%esi), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx -# ifdef USE_AS_STRNCAT - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) -# endif - test %edx, %edx - jnz L(Unaligned64Leave) - - .p2align 4 -L(Unaligned64Loop_start): - add $64, %eax - add $64, %esi - movdqu %xmm4, -64(%eax) - movaps (%esi), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%eax) - movaps 16(%esi), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%esi), %xmm3 - movdqu %xmm6, -32(%eax) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%eax) - movaps 48(%esi), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx -# ifdef USE_AS_STRNCAT - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) -# endif - test %edx, %edx - jz L(Unaligned64Loop_start) - -L(Unaligned64Leave): - pxor %xmm1, %xmm1 - - pcmpeqb %xmm4, %xmm0 - pcmpeqb %xmm5, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_0) - test %ecx, %ecx - jnz L(CopyFrom1To16BytesUnaligned_16) - - pcmpeqb %xmm6, %xmm0 - pcmpeqb %xmm7, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_32) - - bsf %ecx, %edx - movdqu %xmm4, (%eax) - movdqu %xmm5, 16(%eax) - movdqu %xmm6, 32(%eax) - add $48, %esi - add $48, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -# ifdef USE_AS_STRNCAT - .p2align 4 -L(BigN): - pcmpeqb (%esi), %xmm1 - pmovmskb %xmm1, %edx - shr %cl, %edx - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%eax) - sub %ecx, %eax - sub $48, %ebx - add %ecx, %ebx - - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - jmp L(Unalign16BothBigN) -# endif - -/*------------end of main part-------------------------------*/ - -/* Case1 */ - .p2align 4 -L(CopyFrom1To16Bytes): - add %ecx, %eax - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTail): - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1): - add $16, %esi - add $16, %eax -L(CopyFrom1To16BytesTail1): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes): - bsf %edx, %edx - add %ecx, %esi - add $16, %edx - sub %ecx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_0): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_16): - bsf %ecx, %edx - movdqu %xmm4, (%eax) - add $16, %esi - add $16, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_32): - bsf %edx, %edx - movdqu %xmm4, (%eax) - movdqu %xmm5, 16(%eax) - add $32, %esi - add $32, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -# ifdef USE_AS_STRNCAT - - .p2align 4 -L(CopyFrom1To16BytesExit): - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -/* Case2 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %ecx, %eax - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - add $16, %edx - sub %ecx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -L(CopyFrom1To16BytesTailCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -L(CopyFrom1To16BytesTail1Case2): - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -/* Case2 or Case3, Case3 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesCase2) -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %ecx, %eax - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To32BytesCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTailCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTailCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1Case2OrCase3): - add $16, %eax - add $16, %esi - sub $16, %ebx -L(CopyFrom1To16BytesTail1Case2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1Case2) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -# endif - -# ifdef USE_AS_STRNCAT - .p2align 4 -L(StrncatExit0): - movb %bh, (%eax) - mov STR3(%esp), %eax - RETURN -# endif - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit1): - movb %bh, 1(%eax) -# endif -L(Exit1): -# ifdef USE_AS_STRNCAT - movb (%esi), %dh -# endif - movb %dh, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit2): - movb %bh, 2(%eax) -# endif -L(Exit2): - movw (%esi), %dx - movw %dx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit3): - movb %bh, 3(%eax) -# endif -L(Exit3): - movw (%esi), %cx - movw %cx, (%eax) -# ifdef USE_AS_STRNCAT - movb 2(%esi), %dh -# endif - movb %dh, 2(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit4): - movb %bh, 4(%eax) -# endif -L(Exit4): - movl (%esi), %edx - movl %edx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit5): - movb %bh, 5(%eax) -# endif -L(Exit5): - movl (%esi), %ecx -# ifdef USE_AS_STRNCAT - movb 4(%esi), %dh -# endif - movb %dh, 4(%eax) - movl %ecx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit6): - movb %bh, 6(%eax) -# endif -L(Exit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%eax) - movw %dx, 4(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit7): - movb %bh, 7(%eax) -# endif -L(Exit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%eax) - movl %edx, 3(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit8): - movb %bh, 8(%eax) -# endif -L(Exit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit9): - movb %bh, 9(%eax) -# endif -L(Exit9): - movlpd (%esi), %xmm0 -# ifdef USE_AS_STRNCAT - movb 8(%esi), %dh -# endif - movb %dh, 8(%eax) - movlpd %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit10): - movb %bh, 10(%eax) -# endif -L(Exit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%eax) - movw %dx, 8(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit11): - movb %bh, 11(%eax) -# endif -L(Exit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%eax) - movl %edx, 7(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit12): - movb %bh, 12(%eax) -# endif -L(Exit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%eax) - movl %edx, 8(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit13): - movb %bh, 13(%eax) -# endif -L(Exit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 5(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit14): - movb %bh, 14(%eax) -# endif -L(Exit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 6(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit15): - movb %bh, 15(%eax) -# endif -L(Exit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 7(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit16): - movb %bh, 16(%eax) -# endif -L(Exit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit17): - movb %bh, 17(%eax) -# endif -L(Exit17): - movdqu (%esi), %xmm0 -# ifdef USE_AS_STRNCAT - movb 16(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movb %dh, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit18): - movb %bh, 18(%eax) -# endif -L(Exit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%eax) - movw %cx, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit19): - movb %bh, 19(%eax) -# endif -L(Exit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%eax) - movl %ecx, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit20): - movb %bh, 20(%eax) -# endif -L(Exit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%eax) - movl %ecx, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit21): - movb %bh, 21(%eax) -# endif -L(Exit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx -# ifdef USE_AS_STRNCAT - movb 20(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movl %ecx, 16(%eax) - movb %dh, 20(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit22): - movb %bh, 22(%eax) -# endif -L(Exit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%eax) - movlpd %xmm3, 14(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit23): - movb %bh, 23(%eax) -# endif -L(Exit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%eax) - movlpd %xmm3, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit24): - movb %bh, 24(%eax) -# endif -L(Exit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit25): - movb %bh, 25(%eax) -# endif -L(Exit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 -# ifdef USE_AS_STRNCAT - movb 24(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movb %dh, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit26): - movb %bh, 26(%eax) -# endif -L(Exit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movw %cx, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit27): - movb %bh, 27(%eax) -# endif -L(Exit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movl %ecx, 23(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit28): - movb %bh, 28(%eax) -# endif -L(Exit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movl %ecx, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit29): - movb %bh, 29(%eax) -# endif -L(Exit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 13(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit30): - movb %bh, 30(%eax) -# endif -L(Exit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 14(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit31): - movb %bh, 31(%eax) -# endif -L(Exit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit32): - movb %bh, 32(%eax) -# endif -L(Exit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 16(%eax) - mov STR3(%esp), %eax - RETURN - -# ifdef USE_AS_STRNCAT - - .p2align 4 -L(UnalignedLeaveCase2OrCase3): - test %edx, %edx - jnz L(Unaligned64LeaveCase2) -L(Unaligned64LeaveCase3): - lea 64(%ebx), %ecx - and $-16, %ecx - add $48, %ebx - jl L(CopyFrom1To16BytesCase3) - movdqu %xmm4, (%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm5, 16(%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm6, 32(%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm7, 48(%eax) - xor %bh, %bh - movb %bh, 64(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -L(Unaligned64LeaveCase2): - xor %ecx, %ecx - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm4, (%eax) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm5, 16(%eax) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm6, 32(%eax) - lea 16(%eax, %ecx), %eax - lea 16(%esi, %ecx), %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) -# endif - .p2align 4 -L(ExitZero): - RETURN - -END (STRCAT) - - .p2align 4 - .section .rodata -L(ExitTable): - .int JMPTBL(L(Exit1), L(ExitTable)) - .int JMPTBL(L(Exit2), L(ExitTable)) - .int JMPTBL(L(Exit3), L(ExitTable)) - .int JMPTBL(L(Exit4), L(ExitTable)) - .int JMPTBL(L(Exit5), L(ExitTable)) - .int JMPTBL(L(Exit6), L(ExitTable)) - .int JMPTBL(L(Exit7), L(ExitTable)) - .int JMPTBL(L(Exit8), L(ExitTable)) - .int JMPTBL(L(Exit9), L(ExitTable)) - .int JMPTBL(L(Exit10), L(ExitTable)) - .int JMPTBL(L(Exit11), L(ExitTable)) - .int JMPTBL(L(Exit12), L(ExitTable)) - .int JMPTBL(L(Exit13), L(ExitTable)) - .int JMPTBL(L(Exit14), L(ExitTable)) - .int JMPTBL(L(Exit15), L(ExitTable)) - .int JMPTBL(L(Exit16), L(ExitTable)) - .int JMPTBL(L(Exit17), L(ExitTable)) - .int JMPTBL(L(Exit18), L(ExitTable)) - .int JMPTBL(L(Exit19), L(ExitTable)) - .int JMPTBL(L(Exit20), L(ExitTable)) - .int JMPTBL(L(Exit21), L(ExitTable)) - .int JMPTBL(L(Exit22), L(ExitTable)) - .int JMPTBL(L(Exit23), L(ExitTable)) - .int JMPTBL(L(Exit24), L(ExitTable)) - .int JMPTBL(L(Exit25), L(ExitTable)) - .int JMPTBL(L(Exit26), L(ExitTable)) - .int JMPTBL(L(Exit27), L(ExitTable)) - .int JMPTBL(L(Exit28), L(ExitTable)) - .int JMPTBL(L(Exit29), L(ExitTable)) - .int JMPTBL(L(Exit30), L(ExitTable)) - .int JMPTBL(L(Exit31), L(ExitTable)) - .int JMPTBL(L(Exit32), L(ExitTable)) -# ifdef USE_AS_STRNCAT -L(ExitStrncatTable): - .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable)) -# endif -#endif |