diff options
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strcpy-sse2.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/strcpy-sse2.S | 2250 |
1 files changed, 0 insertions, 2250 deletions
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S deleted file mode 100644 index ed627a5f62..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S +++ /dev/null @@ -1,2250 +0,0 @@ -/* strcpy with SSE2 and unaligned load - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCPY -# define STRCPY __strcpy_sse2 -# endif - -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -# ifdef USE_AS_STRNCPY -# define PARMS 16 -# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi) -# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \ - CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi); - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into ECX and branch to it. TABLE is a - jump table with relative offsets. - INDEX is a register contains the index into the jump table. - SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into ECX. */ \ - SETUP_PIC_REG(cx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ecx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ecx,INDEX,SCALE), %ecx; \ - /* We loaded the jump table and adjusted ECX. Go. */ \ - jmp *%ecx -# else -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - -.text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edi - mov STR2(%esp), %esi - movl LEN(%esp), %ebx - test %ebx, %ebx - jz L(ExitZero) - - mov %esi, %ecx -# ifndef USE_AS_STPCPY - mov %edi, %eax /* save result */ -# endif - and $15, %ecx - jz L(SourceStringAlignmentZero) - - and $-16, %esi - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - - pcmpeqb (%esi), %xmm1 - add %ecx, %ebx - pmovmskb %xmm1, %edx - shr %cl, %edx -# ifdef USE_AS_STPCPY - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# else - cmp $17, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $32, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# else - cmp $33, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%edi) - - sub %ecx, %edi - -/* If source address alignment != destination address alignment */ - .p2align 4 -L(Unalign16Both): - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%edi, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $48, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm2) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%edi, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm3) - - movaps 16(%esi, %ecx), %xmm4 - movdqu %xmm3, (%edi, %ecx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm4) - - movaps 16(%esi, %ecx), %xmm1 - movdqu %xmm4, (%edi, %ecx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm1) - - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%edi, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm2) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%edi, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm3) - - movdqu %xmm3, (%edi, %ecx) - mov %esi, %edx - lea 16(%esi, %ecx), %esi - and $-0x40, %esi - sub %esi, %edx - sub %edx, %edi - lea 128(%ebx, %edx), %ebx - -L(Unaligned64Loop): - movaps (%esi), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%esi), %xmm5 - movaps 32(%esi), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%esi), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) - test %edx, %edx - jnz L(Unaligned64Leave) -L(Unaligned64Loop_start): - add $64, %edi - add $64, %esi - movdqu %xmm4, -64(%edi) - movaps (%esi), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%edi) - movaps 16(%esi), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%esi), %xmm3 - movdqu %xmm6, -32(%edi) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%edi) - movaps 48(%esi), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) - test %edx, %edx - jz L(Unaligned64Loop_start) -L(Unaligned64Leave): - pxor %xmm1, %xmm1 - - pcmpeqb %xmm4, %xmm0 - pcmpeqb %xmm5, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_0) - test %ecx, %ecx - jnz L(CopyFrom1To16BytesUnaligned_16) - - pcmpeqb %xmm6, %xmm0 - pcmpeqb %xmm7, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_32) - - bsf %ecx, %edx - movdqu %xmm4, (%edi) - movdqu %xmm5, 16(%edi) - movdqu %xmm6, 32(%edi) -# ifdef USE_AS_STPCPY - lea 48(%edi, %edx), %eax -# endif - movdqu %xmm7, 48(%edi) - add $15, %ebx - sub %edx, %ebx - lea 49(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - -/* If source address alignment == destination address alignment */ - -L(SourceStringAlignmentZero): - pxor %xmm0, %xmm0 - movdqa (%esi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# else - cmp $17, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1) - - pcmpeqb 16(%esi), %xmm0 - movdqu %xmm1, (%edi) - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $32, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# else - cmp $33, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes1) - - jmp L(Unalign16Both) - -/*-----------------End of main part---------------------------*/ - -/* Case1 */ - .p2align 4 -L(CopyFrom1To16BytesTail): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1): - add $16, %esi - add $16, %edi - sub $16, %ebx -L(CopyFrom1To16BytesTail1): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes): - sub %ecx, %ebx - bsf %edx, %edx - add %ecx, %esi - add $16, %edx - sub %ecx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_0): - bsf %edx, %edx -# ifdef USE_AS_STPCPY - lea (%edi, %edx), %eax -# endif - movdqu %xmm4, (%edi) - add $63, %ebx - sub %edx, %ebx - lea 1(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_16): - bsf %ecx, %edx - movdqu %xmm4, (%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi, %edx), %eax -# endif - movdqu %xmm5, 16(%edi) - add $47, %ebx - sub %edx, %ebx - lea 17(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_32): - bsf %edx, %edx - movdqu %xmm4, (%edi) - movdqu %xmm5, 16(%edi) -# ifdef USE_AS_STPCPY - lea 32(%edi, %edx), %eax -# endif - movdqu %xmm6, 32(%edi) - add $31, %ebx - sub %edx, %ebx - lea 33(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm6): - movdqu %xmm6, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm5): - movdqu %xmm5, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm4): - movdqu %xmm4, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm3): - movdqu %xmm3, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm1): - movdqu %xmm1, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesExit): - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -/* Case2 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %ecx, %edi - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - add $16, %edx - sub %ecx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -L(CopyFrom1To16BytesTailCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -L(CopyFrom1To16BytesTail1Case2): - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -/* Case2 or Case3, Case3 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesCase2) -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %ecx, %edi - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To32BytesCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTailCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTailCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1Case2OrCase3): - add $16, %edi - add $16, %esi - sub $16, %ebx -L(CopyFrom1To16BytesTail1Case2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1Case2) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(Exit0): -# ifdef USE_AS_STPCPY - mov %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit1): - movb %dh, (%edi) -# ifdef USE_AS_STPCPY - lea (%edi), %eax -# endif - sub $1, %ebx - lea 1(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit2): - movw (%esi), %dx - movw %dx, (%edi) -# ifdef USE_AS_STPCPY - lea 1(%edi), %eax -# endif - sub $2, %ebx - lea 2(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit3): - movw (%esi), %cx - movw %cx, (%edi) - movb %dh, 2(%edi) -# ifdef USE_AS_STPCPY - lea 2(%edi), %eax -# endif - sub $3, %ebx - lea 3(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit4): - movl (%esi), %edx - movl %edx, (%edi) -# ifdef USE_AS_STPCPY - lea 3(%edi), %eax -# endif - sub $4, %ebx - lea 4(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit5): - movl (%esi), %ecx - movb %dh, 4(%edi) - movl %ecx, (%edi) -# ifdef USE_AS_STPCPY - lea 4(%edi), %eax -# endif - sub $5, %ebx - lea 5(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%edi) - movw %dx, 4(%edi) -# ifdef USE_AS_STPCPY - lea 5(%edi), %eax -# endif - sub $6, %ebx - lea 6(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%edi) - movl %edx, 3(%edi) -# ifdef USE_AS_STPCPY - lea 6(%edi), %eax -# endif - sub $7, %ebx - lea 7(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 7(%edi), %eax -# endif - sub $8, %ebx - lea 8(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit9): - movlpd (%esi), %xmm0 - movb %dh, 8(%edi) - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 8(%edi), %eax -# endif - sub $9, %ebx - lea 9(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 9(%edi), %eax -# endif - sub $10, %ebx - lea 10(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) -# ifdef USE_AS_STPCPY - lea 10(%edi), %eax -# endif - sub $11, %ebx - lea 11(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 11(%edi), %eax -# endif - sub $12, %ebx - lea 12(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 5(%edi) -# ifdef USE_AS_STPCPY - lea 12(%edi), %eax -# endif - sub $13, %ebx - lea 13(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 6(%edi) -# ifdef USE_AS_STPCPY - lea 13(%edi), %eax -# endif - sub $14, %ebx - lea 14(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 7(%edi) -# ifdef USE_AS_STPCPY - lea 14(%edi), %eax -# endif - sub $15, %ebx - lea 15(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 15(%edi), %eax -# endif - sub $16, %ebx - lea 16(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit17): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) - movb %dh, 16(%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi), %eax -# endif - sub $17, %ebx - lea 17(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%edi) - movw %cx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 17(%edi), %eax -# endif - sub $18, %ebx - lea 18(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 15(%edi) -# ifdef USE_AS_STPCPY - lea 18(%edi), %eax -# endif - sub $19, %ebx - lea 19(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 19(%edi), %eax -# endif - sub $20, %ebx - lea 20(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) - movb %dh, 20(%edi) -# ifdef USE_AS_STPCPY - lea 20(%edi), %eax -# endif - sub $21, %ebx - lea 21(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 14(%edi) -# ifdef USE_AS_STPCPY - lea 21(%edi), %eax -# endif - sub $22, %ebx - lea 22(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 15(%edi) -# ifdef USE_AS_STPCPY - lea 22(%edi), %eax -# endif - sub $23, %ebx - lea 23(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 23(%edi), %eax -# endif - sub $24, %ebx - lea 24(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movb %dh, 24(%edi) -# ifdef USE_AS_STPCPY - lea 24(%edi), %eax -# endif - sub $25, %ebx - lea 25(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movw %cx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 25(%edi), %eax -# endif - sub $26, %ebx - lea 26(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 23(%edi) -# ifdef USE_AS_STPCPY - lea 26(%edi), %eax -# endif - sub $27, %ebx - lea 27(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 27(%edi), %eax -# endif - sub $28, %ebx - lea 28(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 13(%edi) -# ifdef USE_AS_STPCPY - lea 28(%edi), %eax -# endif - sub $29, %ebx - lea 29(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 14(%edi) -# ifdef USE_AS_STPCPY - lea 29(%edi), %eax -# endif - sub $30, %ebx - lea 30(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - - .p2align 4 -L(Exit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 15(%edi) -# ifdef USE_AS_STPCPY - lea 30(%edi), %eax -# endif - sub $31, %ebx - lea 31(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 31(%edi), %eax -# endif - sub $32, %ebx - lea 32(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(StrncpyExit1): - movb (%esi), %dl - movb %dl, (%edi) -# ifdef USE_AS_STPCPY - lea 1(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit2): - movw (%esi), %dx - movw %dx, (%edi) -# ifdef USE_AS_STPCPY - lea 2(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit3): - movw (%esi), %cx - movb 2(%esi), %dl - movw %cx, (%edi) - movb %dl, 2(%edi) -# ifdef USE_AS_STPCPY - lea 3(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit4): - movl (%esi), %edx - movl %edx, (%edi) -# ifdef USE_AS_STPCPY - lea 4(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit5): - movl (%esi), %ecx - movb 4(%esi), %dl - movl %ecx, (%edi) - movb %dl, 4(%edi) -# ifdef USE_AS_STPCPY - lea 5(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%edi) - movw %dx, 4(%edi) -# ifdef USE_AS_STPCPY - lea 6(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%edi) - movl %edx, 3(%edi) -# ifdef USE_AS_STPCPY - lea 7(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 8(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit9): - movlpd (%esi), %xmm0 - movb 8(%esi), %dl - movlpd %xmm0, (%edi) - movb %dl, 8(%edi) -# ifdef USE_AS_STPCPY - lea 9(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 10(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) -# ifdef USE_AS_STPCPY - lea 11(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 12(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 5(%edi) -# ifdef USE_AS_STPCPY - lea 13(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 6(%edi) -# ifdef USE_AS_STPCPY - lea 14(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 7(%edi) -# ifdef USE_AS_STPCPY - lea 15(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit17): - movdqu (%esi), %xmm0 - movb 16(%esi), %cl - movdqu %xmm0, (%edi) - movb %cl, 16(%edi) -# ifdef USE_AS_STPCPY - lea 17(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%edi) - movw %cx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 18(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 15(%edi) -# ifdef USE_AS_STPCPY - lea 19(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 20(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movb 20(%esi), %dl - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) - movb %dl, 20(%edi) -# ifdef USE_AS_STPCPY - lea 21(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 14(%edi) -# ifdef USE_AS_STPCPY - lea 22(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 15(%edi) -# ifdef USE_AS_STPCPY - lea 23(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 24(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movb 24(%esi), %cl - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movb %cl, 24(%edi) -# ifdef USE_AS_STPCPY - lea 25(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movw %cx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 26(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 23(%edi) -# ifdef USE_AS_STPCPY - lea 27(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 28(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 13(%edi) -# ifdef USE_AS_STPCPY - lea 29(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 14(%edi) -# ifdef USE_AS_STPCPY - lea 30(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 15(%edi) -# ifdef USE_AS_STPCPY - lea 31(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 32(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit33): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movb 32(%esi), %cl - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) - movb %cl, 32(%edi) - RETURN - - .p2align 4 -L(Fill0): - RETURN - - .p2align 4 -L(Fill1): - movb %dl, (%edi) - RETURN - - .p2align 4 -L(Fill2): - movw %dx, (%edi) - RETURN - - .p2align 4 -L(Fill3): - movl %edx, -1(%edi) - RETURN - - .p2align 4 -L(Fill4): - movl %edx, (%edi) - RETURN - - .p2align 4 -L(Fill5): - movl %edx, (%edi) - movb %dl, 4(%edi) - RETURN - - .p2align 4 -L(Fill6): - movl %edx, (%edi) - movw %dx, 4(%edi) - RETURN - - .p2align 4 -L(Fill7): - movlpd %xmm0, -1(%edi) - RETURN - - .p2align 4 -L(Fill8): - movlpd %xmm0, (%edi) - RETURN - - .p2align 4 -L(Fill9): - movlpd %xmm0, (%edi) - movb %dl, 8(%edi) - RETURN - - .p2align 4 -L(Fill10): - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) - RETURN - - .p2align 4 -L(Fill11): - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) - RETURN - - .p2align 4 -L(Fill12): - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) - RETURN - - .p2align 4 -L(Fill13): - movlpd %xmm0, (%edi) - movlpd %xmm0, 5(%edi) - RETURN - - .p2align 4 -L(Fill14): - movlpd %xmm0, (%edi) - movlpd %xmm0, 6(%edi) - RETURN - - .p2align 4 -L(Fill15): - movdqu %xmm0, -1(%edi) - RETURN - - .p2align 4 -L(Fill16): - movdqu %xmm0, (%edi) - RETURN - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm2): - movdqu %xmm2, (%edi, %ecx) - - .p2align 4 -L(CopyFrom1To16BytesXmmExit): - bsf %edx, %edx - add $15, %ebx - add %ecx, %edi -# ifdef USE_AS_STPCPY - lea (%edi, %edx), %eax -# endif - sub %edx, %ebx - lea 1(%edi, %edx), %edi - - .p2align 4 -L(StrncpyFillTailWithZero): - pxor %xmm0, %xmm0 - xor %edx, %edx - sub $16, %ebx - jbe L(StrncpyFillExit) - - movdqu %xmm0, (%edi) - add $16, %edi - - mov %edi, %esi - and $0xf, %esi - sub %esi, %edi - add %esi, %ebx - sub $64, %ebx - jb L(StrncpyFillLess64) - -L(StrncpyFillLoopMovdqa): - movdqa %xmm0, (%edi) - movdqa %xmm0, 16(%edi) - movdqa %xmm0, 32(%edi) - movdqa %xmm0, 48(%edi) - add $64, %edi - sub $64, %ebx - jae L(StrncpyFillLoopMovdqa) - -L(StrncpyFillLess64): - add $32, %ebx - jl L(StrncpyFillLess32) - movdqa %xmm0, (%edi) - movdqa %xmm0, 16(%edi) - add $32, %edi - sub $16, %ebx - jl L(StrncpyFillExit) - movdqa %xmm0, (%edi) - add $16, %edi - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - -L(StrncpyFillLess32): - add $16, %ebx - jl L(StrncpyFillExit) - movdqa %xmm0, (%edi) - add $16, %edi - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - -L(StrncpyFillExit): - add $16, %ebx - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - - .p2align 4 -L(UnalignedLeaveCase2OrCase3): - test %edx, %edx - jnz L(Unaligned64LeaveCase2) -L(Unaligned64LeaveCase3): - lea 64(%ebx), %ecx - and $-16, %ecx - add $48, %ebx - jl L(CopyFrom1To16BytesCase3) - movdqu %xmm4, (%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm5, 16(%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm6, 32(%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm7, 48(%edi) -# ifdef USE_AS_STPCPY - lea 64(%edi), %eax -# endif - RETURN - - .p2align 4 -L(Unaligned64LeaveCase2): - xor %ecx, %ecx - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm4) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm4, (%edi) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm5) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm5, 16(%edi) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm6) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm6, 32(%edi) - lea 16(%edi, %ecx), %edi - lea 16(%esi, %ecx), %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(ExitZero): - movl %edi, %eax - RETURN - -END (STRCPY) - - .p2align 4 - .section .rodata -L(ExitTable): - .int JMPTBL(L(Exit1), L(ExitTable)) - .int JMPTBL(L(Exit2), L(ExitTable)) - .int JMPTBL(L(Exit3), L(ExitTable)) - .int JMPTBL(L(Exit4), L(ExitTable)) - .int JMPTBL(L(Exit5), L(ExitTable)) - .int JMPTBL(L(Exit6), L(ExitTable)) - .int JMPTBL(L(Exit7), L(ExitTable)) - .int JMPTBL(L(Exit8), L(ExitTable)) - .int JMPTBL(L(Exit9), L(ExitTable)) - .int JMPTBL(L(Exit10), L(ExitTable)) - .int JMPTBL(L(Exit11), L(ExitTable)) - .int JMPTBL(L(Exit12), L(ExitTable)) - .int JMPTBL(L(Exit13), L(ExitTable)) - .int JMPTBL(L(Exit14), L(ExitTable)) - .int JMPTBL(L(Exit15), L(ExitTable)) - .int JMPTBL(L(Exit16), L(ExitTable)) - .int JMPTBL(L(Exit17), L(ExitTable)) - .int JMPTBL(L(Exit18), L(ExitTable)) - .int JMPTBL(L(Exit19), L(ExitTable)) - .int JMPTBL(L(Exit20), L(ExitTable)) - .int JMPTBL(L(Exit21), L(ExitTable)) - .int JMPTBL(L(Exit22), L(ExitTable)) - .int JMPTBL(L(Exit23), L(ExitTable)) - .int JMPTBL(L(Exit24), L(ExitTable)) - .int JMPTBL(L(Exit25), L(ExitTable)) - .int JMPTBL(L(Exit26), L(ExitTable)) - .int JMPTBL(L(Exit27), L(ExitTable)) - .int JMPTBL(L(Exit28), L(ExitTable)) - .int JMPTBL(L(Exit29), L(ExitTable)) - .int JMPTBL(L(Exit30), L(ExitTable)) - .int JMPTBL(L(Exit31), L(ExitTable)) - .int JMPTBL(L(Exit32), L(ExitTable)) - -L(ExitStrncpyTable): - .int JMPTBL(L(Exit0), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) - - .p2align 4 -L(FillTable): - .int JMPTBL(L(Fill0), L(FillTable)) - .int JMPTBL(L(Fill1), L(FillTable)) - .int JMPTBL(L(Fill2), L(FillTable)) - .int JMPTBL(L(Fill3), L(FillTable)) - .int JMPTBL(L(Fill4), L(FillTable)) - .int JMPTBL(L(Fill5), L(FillTable)) - .int JMPTBL(L(Fill6), L(FillTable)) - .int JMPTBL(L(Fill7), L(FillTable)) - .int JMPTBL(L(Fill8), L(FillTable)) - .int JMPTBL(L(Fill9), L(FillTable)) - .int JMPTBL(L(Fill10), L(FillTable)) - .int JMPTBL(L(Fill11), L(FillTable)) - .int JMPTBL(L(Fill12), L(FillTable)) - .int JMPTBL(L(Fill13), L(FillTable)) - .int JMPTBL(L(Fill14), L(FillTable)) - .int JMPTBL(L(Fill15), L(FillTable)) - .int JMPTBL(L(Fill16), L(FillTable)) -# else -# define PARMS 4 -# define ENTRANCE -# define RETURN POP (%edi); ret; CFI_PUSH (%edi) -# define RETURN1 ret - - .text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edx - mov STR2(%esp), %ecx - - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - cmpb $0, 7(%ecx) - jz L(ExitTail8) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - cmpb $0, 14(%ecx) - jz L(ExitTail15) - cmpb $0, 15(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - PUSH (%ebx) - - mov %edx, %edi - lea 16(%ecx), %ebx - and $-16, %ebx - pxor %xmm0, %xmm0 - movdqu (%ecx), %xmm1 - movdqu %xmm1, (%edx) - pcmpeqb (%ebx), %xmm0 - pmovmskb %xmm0, %eax - sub %ecx, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %ecx, %eax - lea 16(%ecx), %ecx - and $-16, %ecx - sub %ecx, %eax - sub %eax, %edx - xor %ebx, %ebx - - .p2align 4 - movdqa (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movdqu %xmm1, (%edx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm3 - movdqu %xmm2, (%edx, %ebx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm4 - movdqu %xmm3, (%edx, %ebx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm1 - movdqu %xmm4, (%edx, %ebx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm2 - movdqu %xmm1, (%edx, %ebx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm3 - movdqu %xmm2, (%edx, %ebx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm3, (%edx, %ebx) - mov %ecx, %eax - lea 16(%ecx, %ebx), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps 32(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - add $64, %ecx - pminub %xmm7, %xmm3 - add $64, %edx - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(Aligned64Leave) -L(Aligned64Loop_start): - movdqu %xmm4, -64(%edx) - movaps (%ecx), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%edx) - movaps 16(%ecx), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%ecx), %xmm3 - movdqu %xmm6, -32(%edx) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%edx) - movaps 48(%ecx), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $64, %edx - add $64, %ecx - test %eax, %eax - jz L(Aligned64Loop_start) -L(Aligned64Leave): - sub $0xa0, %ebx - pxor %xmm0, %xmm0 - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movdqu %xmm4, -64(%edx) - test %eax, %eax - lea 16(%ebx), %ebx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movdqu %xmm5, -48(%edx) - test %eax, %eax - lea 16(%ebx), %ebx - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm6, -32(%edx) - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%ebx), %ebx - -/*-----------------End of main part---------------------------*/ - - .p2align 4 -L(CopyFrom1To16Bytes): - add %ebx, %edx - add %ebx, %ecx - - POP (%ebx) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - /* Exit 8 */ - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - /* Exit 16 */ - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm0 - movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY - lea 15(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) -# ifdef USE_AS_STPCPY - lea (%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) -# ifdef USE_AS_STPCPY - lea 1(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) -# ifdef USE_AS_STPCPY - lea 2(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) -# ifdef USE_AS_STPCPY - lea 3(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) -# ifdef USE_AS_STPCPY - lea 4(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 5(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY - lea 6(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit9): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) -# ifdef USE_AS_STPCPY - lea 8(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit10): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 9(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit11): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY - lea 10(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit12): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 11(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY - lea 12(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY - lea 13(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - -CFI_POP (%edi) - - .p2align 4 -L(ExitTail1): - movb (%ecx), %al - movb %al, (%edx) - movl %edx, %eax - RETURN1 - - .p2align 4 -L(ExitTail2): - movw (%ecx), %ax - movw %ax, (%edx) -# ifdef USE_AS_STPCPY - lea 1(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) -# ifdef USE_AS_STPCPY - lea 2(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) -# ifdef USE_AS_STPCPY - lea 3(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) -# ifdef USE_AS_STPCPY - lea 4(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 5(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY - lea 6(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail8): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail9): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) -# ifdef USE_AS_STPCPY - lea 8(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail10): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 9(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail11): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY - lea 10(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail12): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 11(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY - lea 12(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY - lea 13(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail16): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm0 - movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY - lea 15(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - -END (STRCPY) -# endif - -#endif |