about summary refs log tree commit diff
path: root/sysdeps/i386/i686/multiarch/strcat-sse2.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strcat-sse2.S')
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-sse2.S1245
1 files changed, 0 insertions, 1245 deletions
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
deleted file mode 100644
index 6359c7330c..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ /dev/null
@@ -1,1245 +0,0 @@
-/* strcat with SSE2
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-#  define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it.  TABLE is a
-	jump table with relative offsets.  INDEX is a register contains the
-	index into the jump table.   SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	/* We first load PC into ECX.  */	\
-	SETUP_PIC_REG(cx);	\
-	/* Get the address of the jump table.  */	\
-	addl	$(TABLE - .), %ecx;	\
-	/* Get the entry and convert the relative offset to the	\
-	absolute address.  */	\
-	addl	(%ecx,INDEX,SCALE), %ecx;	\
-	/* We loaded the jump table and adjusted ECX. Go.  */	\
-	jmp	*%ecx
-# else
-#  define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute offsets.  INDEX is a register contains the index into the
-	jump table.  SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	jmp	*TABLE(,INDEX,SCALE)
-# endif
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_sse2
-# endif
-
-# define PARMS  4
-# define STR1  PARMS+4
-# define STR2  STR1+4
-
-# ifdef USE_AS_STRNCAT
-#  define LEN    STR2+8
-#  define STR3   STR1+4
-# else
-#  define STR3   STR1
-# endif
-
-# define USE_AS_STRCAT
-# ifdef USE_AS_STRNCAT
-#  define RETURN  POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
-# else
-#  define RETURN  POP(%esi); ret; CFI_PUSH(%esi);
-# endif
-
-.text
-ENTRY (STRCAT)
-	PUSH	(%esi)
-	mov	STR1(%esp), %eax
-	mov	STR2(%esp), %esi
-# ifdef USE_AS_STRNCAT
-	PUSH	(%ebx)
-	movl	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(ExitZero)
-# endif
-	cmpb	$0, (%esi)
-	mov	%esi, %ecx
-	mov	%eax, %edx
-	jz	L(ExitZero)
-
-	and	$63, %ecx
-	and	$63, %edx
-	cmp	$32, %ecx
-	ja	L(StrlenCore7_1)
-	cmp	$48, %edx
-	ja	L(alignment_prolog)
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm4, %xmm4
-	pxor	%xmm7, %xmm7
-	movdqu	(%eax), %xmm1
-	movdqu	(%esi), %xmm5
-	pcmpeqb	%xmm1, %xmm0
-	movdqu	16(%esi), %xmm6
-	pmovmskb %xmm0, %ecx
-	pcmpeqb	%xmm5, %xmm4
-	pcmpeqb	%xmm6, %xmm7
-	test	%ecx, %ecx
-	jnz	L(exit_less16_)
-	mov	%eax, %ecx
-	and	$-16, %eax
-	jmp	L(loop_prolog)
-
-L(alignment_prolog):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm4, %xmm4
-	mov	%edx, %ecx
-	pxor	%xmm7, %xmm7
-	and	$15, %ecx
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	movdqu	(%esi), %xmm5
-	movdqu	16(%esi), %xmm6
-	pmovmskb %xmm0, %edx
-	pcmpeqb	%xmm5, %xmm4
-	shr	%cl, %edx
-	pcmpeqb	%xmm6, %xmm7
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	add	%eax, %ecx
-
-	pxor	%xmm0, %xmm0
-L(loop_prolog):
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	.p2align 4
-L(align16_loop):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop)
-	bsf	%edx, %edx
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit16):
-	bsf	%edx, %edx
-	lea	16(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit32):
-	bsf	%edx, %edx
-	lea	32(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit48):
-	bsf	%edx, %edx
-	lea	48(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit_less16):
-	bsf	%edx, %edx
-	add	%ecx, %eax
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit_less16_):
-	bsf	%ecx, %ecx
-	add	%ecx, %eax
-
-	.p2align 4
-L(StartStrcpyPart):
-	pmovmskb %xmm4, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1)
-
-	movdqu	%xmm5, (%eax)
-	pmovmskb %xmm7, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes1)
-
-	mov	%esi, %ecx
-	and	$-16, %esi
-	and	$15, %ecx
-	pxor	%xmm0, %xmm0
-# ifdef USE_AS_STRNCAT
-	add	%ecx, %ebx
-	sbb	%edx, %edx
-	or	%edx, %ebx
-# endif
-	sub	%ecx, %eax
-	jmp	L(Unalign16Both)
-
-L(StrlenCore7_1):
-	mov	%eax, %ecx
-	pxor	%xmm0, %xmm0
-	and	$15, %ecx
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	shr	%cl, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16_1)
-	add	%eax, %ecx
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-
-	.p2align 4
-L(align16_loop_1):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16_1)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32_1)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48_1)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop_1)
-	bsf	%edx, %edx
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit16_1):
-	bsf	%edx, %edx
-	lea	16(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit32_1):
-	bsf	%edx, %edx
-	lea	32(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit48_1):
-	bsf	%edx, %edx
-	lea	48(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit_less16_1):
-	bsf	%edx, %edx
-	add	%ecx, %eax
-	add	%edx, %eax
-
-	.p2align 4
-L(StartStrcpyPart_1):
-	mov	%esi, %ecx
-	and	$15, %ecx
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-
-# ifdef USE_AS_STRNCAT
-	cmp	$48, %ebx
-	ja      L(BigN)
-# endif
-	pcmpeqb	(%esi), %xmm1
-# ifdef USE_AS_STRNCAT
-	add	%ecx, %ebx
-# endif
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%eax)
-	sub	%ecx, %eax
-
-	.p2align 4
-L(Unalign16Both):
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$48, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-L(Unalign16BothBigN):
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%eax, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm4
-	movdqu	%xmm3, (%eax, %ecx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm1
-	movdqu	%xmm4, (%eax, %ecx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%eax, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm3, (%eax, %ecx)
-	mov	%esi, %edx
-	lea	16(%esi, %ecx), %esi
-	and	$-0x40, %esi
-	sub	%esi, %edx
-	sub	%edx, %eax
-# ifdef USE_AS_STRNCAT
-	lea	128(%ebx, %edx), %ebx
-# endif
-	movaps	(%esi), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%esi), %xmm5
-	movaps	32(%esi), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%esi), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(Unaligned64Leave)
-
-	.p2align 4
-L(Unaligned64Loop_start):
-	add	$64, %eax
-	add	$64, %esi
-	movdqu	%xmm4, -64(%eax)
-	movaps	(%esi), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%eax)
-	movaps	16(%esi), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%esi), %xmm3
-	movdqu	%xmm6, -32(%eax)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%eax)
-	movaps	48(%esi), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jz	L(Unaligned64Loop_start)
-
-L(Unaligned64Leave):
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	%xmm4, %xmm0
-	pcmpeqb	%xmm5, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_0)
-	test	%ecx, %ecx
-	jnz	L(CopyFrom1To16BytesUnaligned_16)
-
-	pcmpeqb	%xmm6, %xmm0
-	pcmpeqb	%xmm7, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_32)
-
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%eax)
-	movdqu	%xmm5, 16(%eax)
-	movdqu	%xmm6, 32(%eax)
-	add	$48, %esi
-	add	$48, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-	.p2align 4
-L(BigN):
-	pcmpeqb	(%esi), %xmm1
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%eax)
-	sub	%ecx, %eax
-	sub     $48, %ebx
-	add     %ecx, %ebx
-
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-	jmp	L(Unalign16BothBigN)
-# endif
-
-/*------------end of main part-------------------------------*/
-
-/* Case1 */
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%ecx, %eax
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTail):
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1):
-	add	$16, %esi
-	add	$16, %eax
-L(CopyFrom1To16BytesTail1):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes):
-	bsf	%edx, %edx
-	add	%ecx, %esi
-	add	$16, %edx
-	sub	%ecx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%eax)
-	add	$16, %esi
-	add	$16, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
-	bsf	%edx, %edx
-	movdqu	%xmm4, (%eax)
-	movdqu	%xmm5, 16(%eax)
-	add	$32, %esi
-	add	$32, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-
-	.p2align 4
-L(CopyFrom1To16BytesExit):
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%ecx, %eax
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	add	$16, %edx
-	sub	%ecx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-/* Case2 or Case3,  Case3 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%ecx, %eax
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32BytesCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTailCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
-	add	$16, %eax
-	add	$16, %esi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1Case2)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-# endif
-
-# ifdef USE_AS_STRNCAT
-	.p2align 4
-L(StrncatExit0):
-	movb	%bh, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-# endif
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit1):
-	movb	%bh, 1(%eax)
-# endif
-L(Exit1):
-# ifdef USE_AS_STRNCAT
-	movb	(%esi), %dh
-# endif
-	movb	%dh, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit2):
-	movb	%bh, 2(%eax)
-# endif
-L(Exit2):
-	movw	(%esi), %dx
-	movw	%dx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit3):
-	movb	%bh, 3(%eax)
-# endif
-L(Exit3):
-	movw	(%esi), %cx
-	movw	%cx, (%eax)
-# ifdef USE_AS_STRNCAT
-	movb	2(%esi), %dh
-# endif
-	movb	%dh, 2(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit4):
-	movb	%bh, 4(%eax)
-# endif
-L(Exit4):
-	movl	(%esi), %edx
-	movl	%edx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit5):
-	movb	%bh, 5(%eax)
-# endif
-L(Exit5):
-	movl	(%esi), %ecx
-# ifdef USE_AS_STRNCAT
-	movb	4(%esi), %dh
-# endif
-	movb	%dh, 4(%eax)
-	movl	%ecx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit6):
-	movb	%bh, 6(%eax)
-# endif
-L(Exit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%eax)
-	movw	%dx, 4(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit7):
-	movb	%bh, 7(%eax)
-# endif
-L(Exit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%eax)
-	movl	%edx, 3(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit8):
-	movb	%bh, 8(%eax)
-# endif
-L(Exit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit9):
-	movb	%bh, 9(%eax)
-# endif
-L(Exit9):
-	movlpd	(%esi), %xmm0
-# ifdef USE_AS_STRNCAT
-	movb	8(%esi), %dh
-# endif
-	movb	%dh, 8(%eax)
-	movlpd	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit10):
-	movb	%bh, 10(%eax)
-# endif
-L(Exit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%eax)
-	movw	%dx, 8(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit11):
-	movb	%bh, 11(%eax)
-# endif
-L(Exit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%eax)
-	movl	%edx, 7(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit12):
-	movb	%bh, 12(%eax)
-# endif
-L(Exit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%eax)
-	movl	%edx, 8(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit13):
-	movb	%bh, 13(%eax)
-# endif
-L(Exit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 5(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit14):
-	movb	%bh, 14(%eax)
-# endif
-L(Exit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 6(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit15):
-	movb	%bh, 15(%eax)
-# endif
-L(Exit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 7(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit16):
-	movb	%bh, 16(%eax)
-# endif
-L(Exit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit17):
-	movb	%bh, 17(%eax)
-# endif
-L(Exit17):
-	movdqu	(%esi), %xmm0
-# ifdef USE_AS_STRNCAT
-	movb	16(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movb	%dh, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit18):
-	movb	%bh, 18(%eax)
-# endif
-L(Exit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%eax)
-	movw	%cx, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit19):
-	movb	%bh, 19(%eax)
-# endif
-L(Exit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit20):
-	movb	%bh, 20(%eax)
-# endif
-L(Exit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit21):
-	movb	%bh, 21(%eax)
-# endif
-L(Exit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-# ifdef USE_AS_STRNCAT
-	movb	20(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 16(%eax)
-	movb	%dh, 20(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit22):
-	movb	%bh, 22(%eax)
-# endif
-L(Exit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm3, 14(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit23):
-	movb	%bh, 23(%eax)
-# endif
-L(Exit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm3, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit24):
-	movb	%bh, 24(%eax)
-# endif
-L(Exit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit25):
-	movb	%bh, 25(%eax)
-# endif
-L(Exit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-# ifdef USE_AS_STRNCAT
-	movb	24(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movb	%dh, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit26):
-	movb	%bh, 26(%eax)
-# endif
-L(Exit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movw	%cx, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit27):
-	movb	%bh, 27(%eax)
-# endif
-L(Exit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movl	%ecx, 23(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit28):
-	movb	%bh, 28(%eax)
-# endif
-L(Exit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movl	%ecx, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit29):
-	movb	%bh, 29(%eax)
-# endif
-L(Exit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 13(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit30):
-	movb	%bh, 30(%eax)
-# endif
-L(Exit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 14(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit31):
-	movb	%bh, 31(%eax)
-# endif
-L(Exit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit32):
-	movb	%bh, 32(%eax)
-# endif
-L(Exit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-# ifdef USE_AS_STRNCAT
-
-	.p2align 4
-L(UnalignedLeaveCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
-	lea	64(%ebx), %ecx
-	and	$-16, %ecx
-	add	$48, %ebx
-	jl	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm4, (%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm5, 16(%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm6, 32(%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm7, 48(%eax)
-	xor	%bh, %bh
-	movb	%bh, 64(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-L(Unaligned64LeaveCase2):
-	xor	%ecx, %ecx
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm4, (%eax)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm5, 16(%eax)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm6, 32(%eax)
-	lea	16(%eax, %ecx), %eax
-	lea	16(%esi, %ecx), %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-# endif
-	.p2align 4
-L(ExitZero):
-	RETURN
-
-END (STRCAT)
-
-	.p2align 4
-	.section .rodata
-L(ExitTable):
-	.int	JMPTBL(L(Exit1), L(ExitTable))
-	.int	JMPTBL(L(Exit2), L(ExitTable))
-	.int	JMPTBL(L(Exit3), L(ExitTable))
-	.int	JMPTBL(L(Exit4), L(ExitTable))
-	.int	JMPTBL(L(Exit5), L(ExitTable))
-	.int	JMPTBL(L(Exit6), L(ExitTable))
-	.int	JMPTBL(L(Exit7), L(ExitTable))
-	.int	JMPTBL(L(Exit8), L(ExitTable))
-	.int	JMPTBL(L(Exit9), L(ExitTable))
-	.int	JMPTBL(L(Exit10), L(ExitTable))
-	.int	JMPTBL(L(Exit11), L(ExitTable))
-	.int	JMPTBL(L(Exit12), L(ExitTable))
-	.int	JMPTBL(L(Exit13), L(ExitTable))
-	.int	JMPTBL(L(Exit14), L(ExitTable))
-	.int	JMPTBL(L(Exit15), L(ExitTable))
-	.int	JMPTBL(L(Exit16), L(ExitTable))
-	.int	JMPTBL(L(Exit17), L(ExitTable))
-	.int	JMPTBL(L(Exit18), L(ExitTable))
-	.int	JMPTBL(L(Exit19), L(ExitTable))
-	.int	JMPTBL(L(Exit20), L(ExitTable))
-	.int	JMPTBL(L(Exit21), L(ExitTable))
-	.int	JMPTBL(L(Exit22), L(ExitTable))
-	.int	JMPTBL(L(Exit23), L(ExitTable))
-	.int	JMPTBL(L(Exit24), L(ExitTable))
-	.int	JMPTBL(L(Exit25), L(ExitTable))
-	.int	JMPTBL(L(Exit26), L(ExitTable))
-	.int	JMPTBL(L(Exit27), L(ExitTable))
-	.int	JMPTBL(L(Exit28), L(ExitTable))
-	.int	JMPTBL(L(Exit29), L(ExitTable))
-	.int	JMPTBL(L(Exit30), L(ExitTable))
-	.int	JMPTBL(L(Exit31), L(ExitTable))
-	.int	JMPTBL(L(Exit32), L(ExitTable))
-# ifdef USE_AS_STRNCAT
-L(ExitStrncatTable):
-	.int	JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
-# endif
-#endif