/* strcpy with SSSE3 Copyright (C) 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #ifndef NOT_IN_libc # ifndef USE_AS_STRCAT # include # define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) # define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) # define PUSH(REG) pushl REG; CFI_PUSH (REG) # define POP(REG) popl REG; CFI_POP (REG) # ifndef STRCPY # define STRCPY __strcpy_ssse3 # endif # ifdef USE_AS_STRNCPY # define PARMS 8 # define ENTRANCE PUSH(%ebx) # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx); # define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi) # else # define PARMS 4 # define ENTRANCE # define RETURN ret # define RETURN1 POP(%edi); ret; CFI_PUSH(%edi) # endif # define STR1 PARMS # define STR2 STR1+4 # define LEN STR2+4 /* In this code following instructions are used for copying: movb - 1 byte movw - 2 byte movl - 4 byte movlpd - 8 byte movaps - 16 byte - requires 16 byte alignment of sourse and destination adresses. 16 byte alignment: adress is 32bit value, right four bit of adress shall be 0. */ .text ENTRY (STRCPY) ENTRANCE mov STR1(%esp), %edx mov STR2(%esp), %ecx # ifdef USE_AS_STRNCPY movl LEN(%esp), %ebx test %ebx, %ebx jz L(ExitTail0) cmp $8, %ebx jbe L(StrncpyExit8Bytes) # endif cmpb $0, (%ecx) jz L(ExitTail1) cmpb $0, 1(%ecx) jz L(ExitTail2) cmpb $0, 2(%ecx) jz L(ExitTail3) cmpb $0, 3(%ecx) jz L(ExitTail4) cmpb $0, 4(%ecx) jz L(ExitTail5) cmpb $0, 5(%ecx) jz L(ExitTail6) cmpb $0, 6(%ecx) jz L(ExitTail7) cmpb $0, 7(%ecx) jz L(ExitTail8) # ifdef USE_AS_STRNCPY cmp $16, %ebx jb L(StrncpyExit15Bytes) # endif cmpb $0, 8(%ecx) jz L(ExitTail9) cmpb $0, 9(%ecx) jz L(ExitTail10) cmpb $0, 10(%ecx) jz L(ExitTail11) cmpb $0, 11(%ecx) jz L(ExitTail12) cmpb $0, 12(%ecx) jz L(ExitTail13) cmpb $0, 13(%ecx) jz L(ExitTail14) cmpb $0, 14(%ecx) jz L(ExitTail15) # ifdef USE_AS_STRNCPY cmp $16, %ebx je L(ExitTail16) # endif cmpb $0, 15(%ecx) jz L(ExitTail16) PUSH (%edi) mov %edx, %edi # endif PUSH (%esi) # ifdef USE_AS_STRNCPY mov %ecx, %esi sub $16, %ebx and $0xf, %esi /* add 16 bytes ecx_shift to ebx */ add %esi, %ebx # endif lea 16(%ecx), %esi /* Now: esi = alignment_16(ecx) + ecx_shift + 16; ecx_shift = ecx - alignment_16(ecx) */ and $-16, %esi /* Now: esi = alignment_16(ecx) + 16 */ pxor %xmm0, %xmm0 movlpd (%ecx), %xmm1 movlpd %xmm1, (%edx) /* look if there is zero symbol in next 16 bytes of string from esi to esi + 15 and form mask in xmm0 */ pcmpeqb (%esi), %xmm0 movlpd 8(%ecx), %xmm1 movlpd %xmm1, 8(%edx) /* convert byte mask in xmm0 to bit mask */ pmovmskb %xmm0, %eax sub %ecx, %esi /* esi = 16 - ecx_shift */ /* eax = 0: there isn't end of string from position esi to esi+15 */ # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) mov %edx, %eax lea 16(%edx), %edx /* Now: edx = edx + 16 = alignment_16(edx) + edx_shift + 16 */ and $-16, %edx /* Now: edx = alignment_16(edx) + 16 */ sub %edx, %eax /* Now: eax = edx_shift - 16 */ # ifdef USE_AS_STRNCPY add %eax, %esi lea -1(%esi), %esi and $1<<31, %esi test %esi, %esi jnz L(ContinueCopy) lea 16(%ebx), %ebx L(ContinueCopy): # endif sub %eax, %ecx /* Now: case ecx_shift >= edx_shift: ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16 case ecx_shift < edx_shift: ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift) */ mov %ecx, %eax and $0xf, %eax /* Now: case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift) eax can be 0, 1, ..., 15 */ mov $0, %esi /* case: ecx_shift == edx_shift */ jz L(Align16Both) cmp $8, %eax jae L(ShlHigh8) cmp $1, %eax je L(Shl1) cmp $2, %eax je L(Shl2) cmp $3, %eax je L(Shl3) cmp $4, %eax je L(Shl4) cmp $5, %eax je L(Shl5) cmp $6, %eax je L(Shl6) jmp L(Shl7) L(ShlHigh8): je L(Shl8) cmp $9, %eax je L(Shl9) cmp $10, %eax je L(Shl10) cmp $11, %eax je L(Shl11) cmp $12, %eax je L(Shl12) cmp $13, %eax je L(Shl13) cmp $14, %eax je L(Shl14) jmp L(Shl15) L(Align16Both): movaps (%ecx), %xmm1 movaps 16(%ecx), %xmm2 movaps %xmm1, (%edx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm3 movaps %xmm2, (%edx, %esi) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm4 movaps %xmm3, (%edx, %esi) pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm1 movaps %xmm4, (%edx, %esi) pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm2 movaps %xmm1, (%edx, %esi) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm3 movaps %xmm2, (%edx, %esi) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps %xmm3, (%edx, %esi) mov %ecx, %eax lea 16(%ecx, %esi), %ecx and $-0x40, %ecx sub %ecx, %eax sub %eax, %edx # ifdef USE_AS_STRNCPY lea 48+64(%ebx, %eax), %ebx # endif mov $-0x40, %esi L(Aligned64Loop): movaps (%ecx), %xmm2 movaps 32(%ecx), %xmm3 movaps %xmm2, %xmm4 movaps 16(%ecx), %xmm5 movaps %xmm3, %xmm6 movaps 48(%ecx), %xmm7 pminub %xmm5, %xmm2 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 lea 64(%edx), %edx pcmpeqb %xmm0, %xmm3 lea 64(%ecx), %ecx pmovmskb %xmm3, %eax # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeaveCase2OrCase3) # endif test %eax, %eax jnz L(Aligned64Leave) movaps %xmm4, -64(%edx) movaps %xmm5, -48(%edx) movaps %xmm6, -32(%edx) movaps %xmm7, -16(%edx) jmp L(Aligned64Loop) L(Aligned64Leave): # ifdef USE_AS_STRNCPY lea 48(%ebx), %ebx # endif pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm5, %xmm0 # ifdef USE_AS_STRNCPY lea -16(%ebx), %ebx # endif pmovmskb %xmm0, %eax movaps %xmm4, -64(%edx) test %eax, %eax lea 16(%esi), %esi jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm6, %xmm0 # ifdef USE_AS_STRNCPY lea -16(%ebx), %ebx # endif pmovmskb %xmm0, %eax movaps %xmm5, -48(%edx) test %eax, %eax lea 16(%esi), %esi jnz L(CopyFrom1To16Bytes) movaps %xmm6, -32(%edx) pcmpeqb %xmm7, %xmm0 # ifdef USE_AS_STRNCPY lea -16(%ebx), %ebx # endif pmovmskb %xmm0, %eax lea 16(%esi), %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl1): movaps -1(%ecx), %xmm1 movaps 15(%ecx), %xmm2 L(Shl1Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) # endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) # endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) # endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) # endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 31(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -15(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -1(%ecx), %xmm1 L(Shl1LoopStart): movaps 15(%ecx), %xmm2 movaps 31(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 47(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 63(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $1, %xmm4, %xmm5 test %eax, %eax palignr $1, %xmm3, %xmm4 jnz L(Shl1Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave1) # endif palignr $1, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $1, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl1LoopStart) L(Shl1LoopExit): movaps (%edx), %xmm6 psrldq $15, %xmm6 mov $15, %esi palignr $1, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl2): movaps -2(%ecx), %xmm1 movaps 14(%ecx), %xmm2 L(Shl2Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) # endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) # endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) # endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) # endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 30(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -14(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -2(%ecx), %xmm1 L(Shl2LoopStart): movaps 14(%ecx), %xmm2 movaps 30(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 46(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 62(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $2, %xmm4, %xmm5 test %eax, %eax palignr $2, %xmm3, %xmm4 jnz L(Shl2Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave2) # endif palignr $2, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $2, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl2LoopStart) L(Shl2LoopExit): movaps (%edx), %xmm6 psrldq $14, %xmm6 mov $14, %esi palignr $2, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl3): movaps -3(%ecx), %xmm1 movaps 13(%ecx), %xmm2 L(Shl3Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) # endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) # endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) # endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) # endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 29(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -13(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -3(%ecx), %xmm1 L(Shl3LoopStart): movaps 13(%ecx), %xmm2 movaps 29(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 45(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 61(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $3, %xmm4, %xmm5 test %eax, %eax palignr $3, %xmm3, %xmm4 jnz L(Shl3Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave3) # endif palignr $3, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $3, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl3LoopStart) L(Shl3LoopExit): movaps (%edx), %xmm6 psrldq $13, %xmm6 mov $13, %esi palignr $3, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl4): movaps -4(%ecx), %xmm1 movaps 12(%ecx), %xmm2 L(Shl4Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) # endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) # endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) # endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) # endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 28(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -12(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -4(%ecx), %xmm1 L(Shl4LoopStart): movaps 12(%ecx), %xmm2 movaps 28(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 44(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 60(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $4, %xmm4, %xmm5 test %eax, %eax palignr $4, %xmm3, %xmm4 jnz L(Shl4Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave4) # endif palignr $4, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $4, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl4LoopStart) L(Shl4LoopExit): movaps (%edx), %xmm6 psrldq $12, %xmm6 mov $12, %esi palignr $4, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl5): movaps -5(%ecx), %xmm1 movaps 11(%ecx), %xmm2 L(Shl5Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) # endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) # endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) # endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) # endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 27(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -11(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -5(%ecx), %xmm1 L(Shl5LoopStart): movaps 11(%ecx), %xmm2 movaps 27(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 43(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 59(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $5, %xmm4, %xmm5 test %eax, %eax palignr $5, %xmm3, %xmm4 jnz L(Shl5Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave5) # endif palignr $5, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $5, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl5LoopStart) L(Shl5LoopExit): movaps (%edx), %xmm6 psrldq $11, %xmm6 mov $11, %esi palignr $5, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl6): movaps -6(%ecx), %xmm1 movaps 10(%ecx), %xmm2 L(Shl6Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) # endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) # endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) # endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) # endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 26(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -10(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -6(%ecx), %xmm1 L(Shl6LoopStart): movaps 10(%ecx), %xmm2 movaps 26(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 42(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 58(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $6, %xmm4, %xmm5 test %eax, %eax palignr $6, %xmm3, %xmm4 jnz L(Shl6Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave6) # endif palignr $6, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $6, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl6LoopStart) L(Shl6LoopExit): movaps (%edx), %xmm6 psrldq $10, %xmm6 mov $10, %esi palignr $6, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl7): movaps -7(%ecx), %xmm1 movaps 9(%ecx), %xmm2 L(Shl7Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) # endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) # endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) # endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) # endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 25(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -9(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -7(%ecx), %xmm1 L(Shl7LoopStart): movaps 9(%ecx), %xmm2 movaps 25(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 41(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 57(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $7, %xmm4, %xmm5 test %eax, %eax palignr $7, %xmm3, %xmm4 jnz L(Shl7Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave7) # endif palignr $7, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $7, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl7LoopStart) L(Shl7LoopExit): movaps (%edx), %xmm6 psrldq $9, %xmm6 mov $9, %esi palignr $7, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl8): movaps -8(%ecx), %xmm1 movaps 8(%ecx), %xmm2 L(Shl8Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) # endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) # endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) # endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) # endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 24(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -8(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -8(%ecx), %xmm1 L(Shl8LoopStart): movaps 8(%ecx), %xmm2 movaps 24(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 40(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 56(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $8, %xmm4, %xmm5 test %eax, %eax palignr $8, %xmm3, %xmm4 jnz L(Shl8Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave8) # endif palignr $8, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $8, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl8LoopStart) L(Shl8LoopExit): movaps (%edx), %xmm6 psrldq $8, %xmm6 mov $8, %esi palignr $8, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl9): movaps -9(%ecx), %xmm1 movaps 7(%ecx), %xmm2 L(Shl9Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) # endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) # endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) # endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) # endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 23(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -7(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -9(%ecx), %xmm1 L(Shl9LoopStart): movaps 7(%ecx), %xmm2 movaps 23(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 39(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 55(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $9, %xmm4, %xmm5 test %eax, %eax palignr $9, %xmm3, %xmm4 jnz L(Shl9Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave9) # endif palignr $9, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $9, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl9LoopStart) L(Shl9LoopExit): movaps (%edx), %xmm6 psrldq $7, %xmm6 mov $7, %esi palignr $9, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl10): movaps -10(%ecx), %xmm1 movaps 6(%ecx), %xmm2 L(Shl10Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) # endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) # endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) # endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) # endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 22(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -6(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -10(%ecx), %xmm1 L(Shl10LoopStart): movaps 6(%ecx), %xmm2 movaps 22(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 38(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 54(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $10, %xmm4, %xmm5 test %eax, %eax palignr $10, %xmm3, %xmm4 jnz L(Shl10Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave10) # endif palignr $10, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $10, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl10LoopStart) L(Shl10LoopExit): movaps (%edx), %xmm6 psrldq $6, %xmm6 mov $6, %esi palignr $10, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl11): movaps -11(%ecx), %xmm1 movaps 5(%ecx), %xmm2 L(Shl11Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) # endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) # endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) # endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) # endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 21(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -5(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -11(%ecx), %xmm1 L(Shl11LoopStart): movaps 5(%ecx), %xmm2 movaps 21(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 37(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 53(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $11, %xmm4, %xmm5 test %eax, %eax palignr $11, %xmm3, %xmm4 jnz L(Shl11Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave11) # endif palignr $11, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $11, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl11LoopStart) L(Shl11LoopExit): movaps (%edx), %xmm6 psrldq $5, %xmm6 mov $5, %esi palignr $11, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl12): movaps -12(%ecx), %xmm1 movaps 4(%ecx), %xmm2 L(Shl12Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) # endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) # endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) # endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) # endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 20(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -4(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -12(%ecx), %xmm1 L(Shl12LoopStart): movaps 4(%ecx), %xmm2 movaps 20(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 36(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 52(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $12, %xmm4, %xmm5 test %eax, %eax palignr $12, %xmm3, %xmm4 jnz L(Shl12Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave12) # endif palignr $12, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $12, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl12LoopStart) L(Shl12LoopExit): movaps (%edx), %xmm6 psrldq $4, %xmm6 mov $4, %esi palignr $12, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl13): movaps -13(%ecx), %xmm1 movaps 3(%ecx), %xmm2 L(Shl13Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) # endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) # endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) # endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) # endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 19(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -3(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -13(%ecx), %xmm1 L(Shl13LoopStart): movaps 3(%ecx), %xmm2 movaps 19(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 35(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 51(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $13, %xmm4, %xmm5 test %eax, %eax palignr $13, %xmm3, %xmm4 jnz L(Shl13Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave13) # endif palignr $13, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $13, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl13LoopStart) L(Shl13LoopExit): movaps (%edx), %xmm6 psrldq $3, %xmm6 mov $3, %esi palignr $13, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl14): movaps -14(%ecx), %xmm1 movaps 2(%ecx), %xmm2 L(Shl14Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) # endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) # endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) # endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) # endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 18(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -2(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -14(%ecx), %xmm1 L(Shl14LoopStart): movaps 2(%ecx), %xmm2 movaps 18(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 34(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 50(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $14, %xmm4, %xmm5 test %eax, %eax palignr $14, %xmm3, %xmm4 jnz L(Shl14Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave14) # endif palignr $14, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $14, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl14LoopStart) L(Shl14LoopExit): movaps (%edx), %xmm6 psrldq $2, %xmm6 mov $2, %esi palignr $14, %xmm1, %xmm6 movaps %xmm6, (%edx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl15): movaps -15(%ecx), %xmm1 movaps 1(%ecx), %xmm2 L(Shl15Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) # endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) # endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) # endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) # endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) lea 17(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -1(%ecx), %ecx sub %eax, %edx # ifdef USE_AS_STRNCPY add %eax, %ebx # endif movaps -15(%ecx), %xmm1 L(Shl15LoopStart): movaps 1(%ecx), %xmm2 movaps 17(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 33(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 49(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $15, %xmm4, %xmm5 test %eax, %eax palignr $15, %xmm3, %xmm4 jnz L(Shl15Start) # ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave15) # endif palignr $15, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $15, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl15LoopStart) L(Shl15LoopExit): movaps (%edx), %xmm6 psrldq $1, %xmm6 mov $1, %esi palignr $15, %xmm1, %xmm6 movaps %xmm6, (%edx) # ifdef USE_AS_STRCAT jmp L(CopyFrom1To16Bytes) # endif # ifndef USE_AS_STRCAT .p2align 4 L(CopyFrom1To16Bytes): # ifdef USE_AS_STRNCPY add $16, %ebx # endif add %esi, %edx add %esi, %ecx POP (%esi) test %al, %al jz L(ExitHigh) test $0x01, %al jnz L(Exit1) test $0x02, %al jnz L(Exit2) test $0x04, %al jnz L(Exit3) test $0x08, %al jnz L(Exit4) test $0x10, %al jnz L(Exit5) test $0x20, %al jnz L(Exit6) test $0x40, %al jnz L(Exit7) .p2align 4 L(Exit8): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) # ifdef USE_AS_STPCPY lea 7(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $8, %ebx lea 8(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(ExitHigh): test $0x01, %ah jnz L(Exit9) test $0x02, %ah jnz L(Exit10) test $0x04, %ah jnz L(Exit11) test $0x08, %ah jnz L(Exit12) test $0x10, %ah jnz L(Exit13) test $0x20, %ah jnz L(Exit14) test $0x40, %ah jnz L(Exit15) .p2align 4 L(Exit16): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 8(%ecx), %xmm0 movlpd %xmm0, 8(%edx) # ifdef USE_AS_STPCPY lea 15(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 # ifdef USE_AS_STRNCPY CFI_PUSH(%esi) .p2align 4 L(CopyFrom1To16BytesCase2): add $16, %ebx add %esi, %ecx lea (%esi, %edx), %esi lea -9(%ebx), %edx and $1<<7, %dh or %al, %dh test %dh, %dh lea (%esi), %edx POP (%esi) jz L(ExitHighCase2) cmp $1, %ebx je L(Exit1) test $0x01, %al jnz L(Exit1) cmp $2, %ebx je L(Exit2) test $0x02, %al jnz L(Exit2) cmp $3, %ebx je L(Exit3) test $0x04, %al jnz L(Exit3) cmp $4, %ebx je L(Exit4) test $0x08, %al jnz L(Exit4) cmp $5, %ebx je L(Exit5) test $0x10, %al jnz L(Exit5) cmp $6, %ebx je L(Exit6) test $0x20, %al jnz L(Exit6) cmp $7, %ebx je L(Exit7) test $0x40, %al jnz L(Exit7) jmp L(Exit8) .p2align 4 L(ExitHighCase2): cmp $9, %ebx je L(Exit9) test $0x01, %ah jnz L(Exit9) cmp $10, %ebx je L(Exit10) test $0x02, %ah jnz L(Exit10) cmp $11, %ebx je L(Exit11) test $0x04, %ah jnz L(Exit11) cmp $12, %ebx je L(Exit12) test $0x8, %ah jnz L(Exit12) cmp $13, %ebx je L(Exit13) test $0x10, %ah jnz L(Exit13) cmp $14, %ebx je L(Exit14) test $0x20, %ah jnz L(Exit14) cmp $15, %ebx je L(Exit15) test $0x40, %ah jnz L(Exit15) jmp L(Exit16) CFI_PUSH(%esi) L(CopyFrom1To16BytesCase2OrCase3): test %eax, %eax jnz L(CopyFrom1To16BytesCase2) .p2align 4 L(CopyFrom1To16BytesCase3): add $16, %ebx add %esi, %edx add %esi, %ecx POP (%esi) cmp $16, %ebx je L(Exit16) cmp $8, %ebx je L(Exit8) jg L(More8Case3) cmp $4, %ebx je L(Exit4) jg L(More4Case3) cmp $2, %ebx jl L(Exit1) je L(Exit2) jg L(Exit3) L(More8Case3): /* but less than 16 */ cmp $12, %ebx je L(Exit12) jl L(Less12Case3) cmp $14, %ebx jl L(Exit13) je L(Exit14) jg L(Exit15) L(More4Case3): /* but less than 8 */ cmp $6, %ebx jl L(Exit5) je L(Exit6) jg L(Exit7) L(Less12Case3): /* but more than 8 */ cmp $10, %ebx jl L(Exit9) je L(Exit10) jg L(Exit11) # endif .p2align 4 L(Exit1): movb (%ecx), %al movb %al, (%edx) # ifdef USE_AS_STPCPY lea (%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit2): movw (%ecx), %ax movw %ax, (%edx) # ifdef USE_AS_STPCPY lea 1(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit3): movw (%ecx), %ax movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) # ifdef USE_AS_STPCPY lea 2(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit4): movl (%ecx), %eax movl %eax, (%edx) # ifdef USE_AS_STPCPY lea 3(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit5): movl (%ecx), %eax movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) # ifdef USE_AS_STPCPY lea 4(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit6): movl (%ecx), %eax movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) # ifdef USE_AS_STPCPY lea 5(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit7): movl (%ecx), %eax movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) # ifdef USE_AS_STPCPY lea 6(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit9): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movb 8(%ecx), %al movb %al, 8(%edx) # ifdef USE_AS_STPCPY lea 8(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit10): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movw 8(%ecx), %ax movw %ax, 8(%edx) # ifdef USE_AS_STPCPY lea 9(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit11): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movl 7(%ecx), %eax movl %eax, 7(%edx) # ifdef USE_AS_STPCPY lea 10(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit12): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movl 8(%ecx), %eax movl %eax, 8(%edx) # ifdef USE_AS_STPCPY lea 11(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit13): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 5(%ecx), %xmm0 movlpd %xmm0, 5(%edx) # ifdef USE_AS_STPCPY lea 12(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit14): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 6(%ecx), %xmm0 movlpd %xmm0, 6(%edx) # ifdef USE_AS_STPCPY lea 13(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit15): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) # ifdef USE_AS_STPCPY lea 14(%edx), %eax # else movl %edi, %eax # endif # ifdef USE_AS_STRNCPY sub $15, %ebx lea 15(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 CFI_POP (%edi) # ifdef USE_AS_STRNCPY .p2align 4 L(Fill0): RETURN .p2align 4 L(Fill1): movb %dl, (%ecx) RETURN .p2align 4 L(Fill2): movw %dx, (%ecx) RETURN .p2align 4 L(Fill3): movw %dx, (%ecx) movb %dl, 2(%ecx) RETURN .p2align 4 L(Fill4): movl %edx, (%ecx) RETURN .p2align 4 L(Fill5): movl %edx, (%ecx) movb %dl, 4(%ecx) RETURN .p2align 4 L(Fill6): movl %edx, (%ecx) movw %dx, 4(%ecx) RETURN .p2align 4 L(Fill7): movl %edx, (%ecx) movl %edx, 3(%ecx) RETURN .p2align 4 L(Fill8): movlpd %xmm0, (%ecx) RETURN .p2align 4 L(Fill9): movlpd %xmm0, (%ecx) movb %dl, 8(%ecx) RETURN .p2align 4 L(Fill10): movlpd %xmm0, (%ecx) movw %dx, 8(%ecx) RETURN .p2align 4 L(Fill11): movlpd %xmm0, (%ecx) movl %edx, 7(%ecx) RETURN .p2align 4 L(Fill12): movlpd %xmm0, (%ecx) movl %edx, 8(%ecx) RETURN .p2align 4 L(Fill13): movlpd %xmm0, (%ecx) movlpd %xmm0, 5(%ecx) RETURN .p2align 4 L(Fill14): movlpd %xmm0, (%ecx) movlpd %xmm0, 6(%ecx) RETURN .p2align 4 L(Fill15): movlpd %xmm0, (%ecx) movlpd %xmm0, 7(%ecx) RETURN .p2align 4 L(Fill16): movlpd %xmm0, (%ecx) movlpd %xmm0, 8(%ecx) RETURN .p2align 4 L(StrncpyFillExit1): lea 16(%ebx), %ebx L(FillFrom1To16Bytes): test %ebx, %ebx jz L(Fill0) cmp $16, %ebx je L(Fill16) cmp $8, %ebx je L(Fill8) jg L(FillMore8) cmp $4, %ebx je L(Fill4) jg L(FillMore4) cmp $2, %ebx jl L(Fill1) je L(Fill2) jg L(Fill3) L(FillMore8): /* but less than 16 */ cmp $12, %ebx je L(Fill12) jl L(FillLess12) cmp $14, %ebx jl L(Fill13) je L(Fill14) jg L(Fill15) L(FillMore4): /* but less than 8 */ cmp $6, %ebx jl L(Fill5) je L(Fill6) jg L(Fill7) L(FillLess12): /* but more than 8 */ cmp $10, %ebx jl L(Fill9) je L(Fill10) jmp L(Fill11) CFI_PUSH (%edi) .p2align 4 L(StrncpyFillTailWithZero1): POP (%edi) L(StrncpyFillTailWithZero): pxor %xmm0, %xmm0 xor %edx, %edx sub $16, %ebx jbe L(StrncpyFillExit1) movlpd %xmm0, (%ecx) movlpd %xmm0, 8(%ecx) lea 16(%ecx), %ecx mov %ecx, %edx and $0xf, %edx sub %edx, %ecx add %edx, %ebx xor %edx, %edx sub $64, %ebx jb L(StrncpyFillLess64) L(StrncpyFillLoopMovdqa): movdqa %xmm0, (%ecx) movdqa %xmm0, 16(%ecx) movdqa %xmm0, 32(%ecx) movdqa %xmm0, 48(%ecx) lea 64(%ecx), %ecx sub $64, %ebx jae L(StrncpyFillLoopMovdqa) L(StrncpyFillLess64): add $32, %ebx jl L(StrncpyFillLess32) movdqa %xmm0, (%ecx) movdqa %xmm0, 16(%ecx) lea 32(%ecx), %ecx sub $16, %ebx jl L(StrncpyFillExit1) movdqa %xmm0, (%ecx) lea 16(%ecx), %ecx jmp L(FillFrom1To16Bytes) L(StrncpyFillLess32): add $16, %ebx jl L(StrncpyFillExit1) movdqa %xmm0, (%ecx) lea 16(%ecx), %ecx jmp L(FillFrom1To16Bytes) # endif .p2align 4 L(ExitTail1): movb (%ecx), %al movb %al, (%edx) # ifdef USE_AS_STPCPY lea (%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail2): movw (%ecx), %ax movw %ax, (%edx) # ifdef USE_AS_STPCPY lea 1(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail3): movw (%ecx), %ax movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) # ifdef USE_AS_STPCPY lea 2(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail4): movl (%ecx), %eax movl %eax, (%edx) # ifdef USE_AS_STPCPY lea 3(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail5): movl (%ecx), %eax movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) # ifdef USE_AS_STPCPY lea 4(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail6): movl (%ecx), %eax movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) # ifdef USE_AS_STPCPY lea 5(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail7): movl (%ecx), %eax movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) # ifdef USE_AS_STPCPY lea 6(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail8): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) # ifdef USE_AS_STPCPY lea 7(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $8, %ebx lea 8(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail9): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movb 8(%ecx), %al movb %al, 8(%edx) # ifdef USE_AS_STPCPY lea 8(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail10): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movw 8(%ecx), %ax movw %ax, 8(%edx) # ifdef USE_AS_STPCPY lea 9(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail11): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movl 7(%ecx), %eax movl %eax, 7(%edx) # ifdef USE_AS_STPCPY lea 10(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail12): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movl 8(%ecx), %eax movl %eax, 8(%edx) # ifdef USE_AS_STPCPY lea 11(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail13): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 5(%ecx), %xmm0 movlpd %xmm0, 5(%edx) # ifdef USE_AS_STPCPY lea 12(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail14): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 6(%ecx), %xmm0 movlpd %xmm0, 6(%edx) # ifdef USE_AS_STPCPY lea 13(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail15): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) # ifdef USE_AS_STPCPY lea 14(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $15, %ebx lea 15(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail16): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 8(%ecx), %xmm0 movlpd %xmm0, 8(%edx) # ifdef USE_AS_STPCPY lea 15(%edx), %eax # else movl %edx, %eax # endif # ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN #endif # ifdef USE_AS_STRNCPY # ifndef USE_AS_STRCAT CFI_PUSH (%esi) CFI_PUSH (%edi) # endif L(StrncpyLeaveCase2OrCase3): test %eax, %eax jnz L(Aligned64LeaveCase2) L(Aligned64LeaveCase3): add $48, %ebx jle L(CopyFrom1To16BytesCase3) movaps %xmm4, -64(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase3) movaps %xmm5, -48(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase3) movaps %xmm6, -32(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx jmp L(CopyFrom1To16BytesCase3) L(Aligned64LeaveCase2): pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %eax add $48, %ebx jle L(CopyFrom1To16BytesCase2OrCase3) test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %eax movaps %xmm4, -64(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %eax movaps %xmm5, -48(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %eax movaps %xmm6, -32(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx jmp L(CopyFrom1To16BytesCase2) /* -------------------------------------------------- */ L(StrncpyExit1Case2OrCase3): movaps (%edx), %xmm6 psrldq $15, %xmm6 mov $15, %esi palignr $1, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit2Case2OrCase3): movaps (%edx), %xmm6 psrldq $14, %xmm6 mov $14, %esi palignr $2, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit3Case2OrCase3): movaps (%edx), %xmm6 psrldq $13, %xmm6 mov $13, %esi palignr $3, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit4Case2OrCase3): movaps (%edx), %xmm6 psrldq $12, %xmm6 mov $12, %esi palignr $4, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit5Case2OrCase3): movaps (%edx), %xmm6 psrldq $11, %xmm6 mov $11, %esi palignr $5, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit6Case2OrCase3): movaps (%edx), %xmm6 psrldq $10, %xmm6 mov $10, %esi palignr $6, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit7Case2OrCase3): movaps (%edx), %xmm6 psrldq $9, %xmm6 mov $9, %esi palignr $7, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit8Case2OrCase3): movaps (%edx), %xmm6 psrldq $8, %xmm6 mov $8, %esi palignr $8, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit9Case2OrCase3): movaps (%edx), %xmm6 psrldq $7, %xmm6 mov $7, %esi palignr $9, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit10Case2OrCase3): movaps (%edx), %xmm6 psrldq $6, %xmm6 mov $6, %esi palignr $10, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit11Case2OrCase3): movaps (%edx), %xmm6 psrldq $5, %xmm6 mov $5, %esi palignr $11, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit12Case2OrCase3): movaps (%edx), %xmm6 psrldq $4, %xmm6 mov $4, %esi palignr $12, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit13Case2OrCase3): movaps (%edx), %xmm6 psrldq $3, %xmm6 mov $3, %esi palignr $13, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit14Case2OrCase3): movaps (%edx), %xmm6 psrldq $2, %xmm6 mov $2, %esi palignr $14, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyExit15Case2OrCase3): movaps (%edx), %xmm6 psrldq $1, %xmm6 mov $1, %esi palignr $15, %xmm1, %xmm6 movaps %xmm6, (%edx) test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave1): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit1) palignr $1, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit1) palignr $1, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 31+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit1) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit1) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit1): movaps (%edx, %esi), %xmm6 psrldq $15, %xmm6 palignr $1, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 15(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave2): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit2) palignr $2, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit2) palignr $2, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 30+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit2) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit2) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit2): movaps (%edx, %esi), %xmm6 psrldq $14, %xmm6 palignr $2, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 14(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave3): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit3) palignr $3, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit3) palignr $3, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 29+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit3) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit3) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit3): movaps (%edx, %esi), %xmm6 psrldq $13, %xmm6 palignr $3, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 13(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave4): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit4) palignr $4, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit4) palignr $4, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 28+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit4) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit4) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit4): movaps (%edx, %esi), %xmm6 psrldq $12, %xmm6 palignr $4, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 12(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave5): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit5) palignr $5, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit5) palignr $5, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 27+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit5) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit5) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit5): movaps (%edx, %esi), %xmm6 psrldq $11, %xmm6 palignr $5, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 11(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave6): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit6) palignr $6, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit6) palignr $6, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 26+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit6) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit6) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit6): movaps (%edx, %esi), %xmm6 psrldq $10, %xmm6 palignr $6, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 10(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave7): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit7) palignr $7, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit7) palignr $7, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 25+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit7) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit7) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit7): movaps (%edx, %esi), %xmm6 psrldq $9, %xmm6 palignr $7, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 9(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave8): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit8) palignr $8, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit8) palignr $8, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 24+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit8) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit8) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit8): movaps (%edx, %esi), %xmm6 psrldq $8, %xmm6 palignr $8, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 8(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave9): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit9) palignr $9, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit9) palignr $9, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 23+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit9) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit9) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit9): movaps (%edx, %esi), %xmm6 psrldq $7, %xmm6 palignr $9, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 7(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave10): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit10) palignr $10, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit10) palignr $10, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 22+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit10) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit10) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit10): movaps (%edx, %esi), %xmm6 psrldq $6, %xmm6 palignr $10, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 6(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave11): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit11) palignr $11, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit11) palignr $11, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 21+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit11) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit11) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit11): movaps (%edx, %esi), %xmm6 psrldq $5, %xmm6 palignr $11, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 5(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave12): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit12) palignr $12, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit12) palignr $12, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 20+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit12) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit12) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit12): movaps (%edx, %esi), %xmm6 psrldq $4, %xmm6 palignr $12, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 4(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave13): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit13) palignr $13, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit13) palignr $13, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 19+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit13) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit13) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit13): movaps (%edx, %esi), %xmm6 psrldq $3, %xmm6 palignr $13, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 3(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave14): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit14) palignr $14, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit14) palignr $14, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 18+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit14) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit14) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit14): movaps (%edx, %esi), %xmm6 psrldq $2, %xmm6 palignr $14, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 2(%esi), %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave15): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit15) palignr $15, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 lea 16(%esi), %esi movaps %xmm2, %xmm3 sub $16, %ebx jbe L(StrncpyExit15) palignr $15, %xmm1, %xmm2 movaps %xmm2, 16(%edx) movaps 17+16(%ecx), %xmm2 movaps %xmm3, %xmm1 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit15) movaps %xmm2, %xmm1 movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit15) movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit15): movaps (%edx, %esi), %xmm6 psrldq $1, %xmm6 palignr $15, %xmm1, %xmm6 movaps %xmm6, (%edx, %esi) lea 1(%esi), %esi jmp L(CopyFrom1To16BytesCase3) # endif # ifndef USE_AS_STRCAT # ifdef USE_AS_STRNCPY CFI_POP (%esi) CFI_POP (%edi) .p2align 4 L(ExitTail0): movl %edx, %eax RETURN .p2align 4 L(StrncpyExit15Bytes): cmp $9, %ebx je L(ExitTail9) cmpb $0, 8(%ecx) jz L(ExitTail9) cmp $10, %ebx je L(ExitTail10) cmpb $0, 9(%ecx) jz L(ExitTail10) cmp $11, %ebx je L(ExitTail11) cmpb $0, 10(%ecx) jz L(ExitTail11) cmp $12, %ebx je L(ExitTail12) cmpb $0, 11(%ecx) jz L(ExitTail12) cmp $13, %ebx je L(ExitTail13) cmpb $0, 12(%ecx) jz L(ExitTail13) cmp $14, %ebx je L(ExitTail14) cmpb $0, 13(%ecx) jz L(ExitTail14) movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) # ifdef USE_AS_STPCPY lea 14(%edx), %eax cmpb $1, (%eax) sbb $-1, %eax # else movl %edx, %eax # endif RETURN .p2align 4 L(StrncpyExit8Bytes): cmp $1, %ebx je L(ExitTail1) cmpb $0, (%ecx) jz L(ExitTail1) cmp $2, %ebx je L(ExitTail2) cmpb $0, 1(%ecx) jz L(ExitTail2) cmp $3, %ebx je L(ExitTail3) cmpb $0, 2(%ecx) jz L(ExitTail3) cmp $4, %ebx je L(ExitTail4) cmpb $0, 3(%ecx) jz L(ExitTail4) cmp $5, %ebx je L(ExitTail5) cmpb $0, 4(%ecx) jz L(ExitTail5) cmp $6, %ebx je L(ExitTail6) cmpb $0, 5(%ecx) jz L(ExitTail6) cmp $7, %ebx je L(ExitTail7) cmpb $0, 6(%ecx) jz L(ExitTail7) movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) # ifdef USE_AS_STPCPY lea 7(%edx), %eax cmpb $1, (%eax) sbb $-1, %eax # else movl %edx, %eax # endif RETURN # endif END (STRCPY) # endif #endif