about summary refs log tree commit diff
path: root/sysdeps/i386/i686/multiarch
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-08-20 12:05:50 -0700
committerH.J. Lu <hjl.tools@gmail.com>2015-08-27 09:35:11 -0700
commit4fadad5ce5208d12fd65538ecbec1b97ae83b440 (patch)
tree78866f58140f82a6332733a99fc9ab17de32d904 /sysdeps/i386/i686/multiarch
parent377adacbaaa3974408b708a20912474e45cd383b (diff)
downloadglibc-4fadad5ce5208d12fd65538ecbec1b97ae83b440.tar.gz
glibc-4fadad5ce5208d12fd65538ecbec1b97ae83b440.tar.xz
glibc-4fadad5ce5208d12fd65538ecbec1b97ae83b440.zip
Add i386 strcpy family multiarch functions
Diffstat (limited to 'sysdeps/i386/i686/multiarch')
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile5
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy-ssse3.S3
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy.S9
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy-sse2.S4
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy.S8
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-sse2.S2250
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-ssse3.S3901
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy.S116
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-ssse3.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy.S5
14 files changed, 2 insertions, 6320 deletions
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 101bb7d4c3..2a14fe8983 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -2,9 +2,8 @@ ifeq ($(subdir),string)
 sysdep_routines += strcmp-ssse3 \
 		   strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
 		   varshift \
-		   strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
-		   strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
-		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
+		   strlen-sse2 strlen-sse2-bsf \
+		   strcat-ssse3 \
 		   strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
 		   strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
 		   strnlen-sse2 strnlen-c \
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S b/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
deleted file mode 100644
index 46ca1b3074..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
deleted file mode 100644
index d971c2da38..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/stpcpy.S b/sysdeps/i386/i686/multiarch/stpcpy.S
deleted file mode 100644
index ee81ab6ae3..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy.S
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Multiple versions of stpcpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy
-#include "strcpy.S"
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S b/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
deleted file mode 100644
index 37a703cb76..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
deleted file mode 100644
index 14ed16f6b5..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/stpncpy.S b/sysdeps/i386/i686/multiarch/stpncpy.S
deleted file mode 100644
index 2698ca6a8c..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of stpncpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCPY __stpncpy
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#include "strcpy.S"
-
-weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
deleted file mode 100644
index a37c44530d..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ /dev/null
@@ -1,2250 +0,0 @@
-/* strcpy with SSE2 and unaligned load
-   Copyright (C) 2011-2015 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG)                  \
-	cfi_adjust_cfa_offset (4);     \
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)                   \
-	cfi_adjust_cfa_offset (-4);    \
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-#  define STRCPY  __strcpy_sse2
-# endif
-
-# define STR1  PARMS
-# define STR2  STR1+4
-# define LEN  STR2+4
-
-# ifdef USE_AS_STRNCPY
-#  define PARMS  16
-#  define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
-#  define RETURN  POP(%edi); POP(%esi); POP(%ebx); ret;          \
-	CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
-
-# ifdef SHARED
-#  define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into ECX and branch to it. TABLE is a
-	jump table with relative offsets.
-	INDEX is a register contains the index into the jump table.
-	SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)            \
-	/* We first load PC into ECX.  */                       \
-	SETUP_PIC_REG(cx);                                      \
-	/* Get the address of the jump table.  */               \
-	addl	$(TABLE - .), %ecx;                             \
-	/* Get the entry and convert the relative offset to the \
-	absolute	address.  */                            \
-	addl	(%ecx,INDEX,SCALE), %ecx;                       \
-	/* We loaded the jump table and adjusted ECX. Go.  */  \
-	jmp	*%ecx
-# else
-#  define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute	offsets.  INDEX is a register contains the index into the
-	jump	table.  SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-	jmp	*TABLE(,INDEX,SCALE)
-# endif
-
-.text
-ENTRY (STRCPY)
-	ENTRANCE
-	mov	STR1(%esp), %edi
-	mov	STR2(%esp), %esi
-	movl	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(ExitZero)
-
-	mov	%esi, %ecx
-# ifndef USE_AS_STPCPY
-	mov	%edi, %eax      /* save result */
-# endif
-	and	$15, %ecx
-	jz	L(SourceStringAlignmentZero)
-
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	(%esi), %xmm1
-	add	%ecx, %ebx
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# else
-	cmp	$17, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# else
-	cmp	$33, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%edi)
-
-	sub	%ecx, %edi
-
-/* If source address alignment != destination address alignment */
-	.p2align 4
-L(Unalign16Both):
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%edi, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$48, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%edi, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
-
-	movaps	16(%esi, %ecx), %xmm4
-	movdqu	%xmm3, (%edi, %ecx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
-
-	movaps	16(%esi, %ecx), %xmm1
-	movdqu	%xmm4, (%edi, %ecx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm1)
-
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%edi, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%edi, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
-
-	movdqu	%xmm3, (%edi, %ecx)
-	mov	%esi, %edx
-	lea	16(%esi, %ecx), %esi
-	and	$-0x40, %esi
-	sub	%esi, %edx
-	sub	%edx, %edi
-	lea	128(%ebx, %edx), %ebx
-
-L(Unaligned64Loop):
-	movaps	(%esi), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%esi), %xmm5
-	movaps	32(%esi), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%esi), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(Unaligned64Leave)
-L(Unaligned64Loop_start):
-	add	$64, %edi
-	add	$64, %esi
-	movdqu	%xmm4, -64(%edi)
-	movaps	(%esi), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%edi)
-	movaps	16(%esi), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%esi), %xmm3
-	movdqu	%xmm6, -32(%edi)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%edi)
-	movaps	48(%esi), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-	test	%edx, %edx
-	jz	L(Unaligned64Loop_start)
-L(Unaligned64Leave):
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	%xmm4, %xmm0
-	pcmpeqb	%xmm5, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_0)
-	test	%ecx, %ecx
-	jnz	L(CopyFrom1To16BytesUnaligned_16)
-
-	pcmpeqb	%xmm6, %xmm0
-	pcmpeqb	%xmm7, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_32)
-
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%edi)
-	movdqu	%xmm5, 16(%edi)
-	movdqu	%xmm6, 32(%edi)
-# ifdef USE_AS_STPCPY
-	lea	48(%edi, %edx), %eax
-# endif
-	movdqu	%xmm7, 48(%edi)
-	add	$15, %ebx
-	sub	%edx, %ebx
-	lea	49(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-/* If source address alignment == destination address alignment */
-
-L(SourceStringAlignmentZero):
-	pxor	%xmm0, %xmm0
-	movdqa	(%esi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# else
-	cmp	$17, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1)
-
-	pcmpeqb	16(%esi), %xmm0
-	movdqu	%xmm1, (%edi)
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# else
-	cmp	$33, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes1)
-
-	jmp	L(Unalign16Both)
-
-/*-----------------End of main part---------------------------*/
-
-/* Case1 */
-	.p2align 4
-L(CopyFrom1To16BytesTail):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1):
-	add	$16, %esi
-	add	$16, %edi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes):
-	sub	%ecx, %ebx
-	bsf	%edx, %edx
-	add	%ecx, %esi
-	add	$16, %edx
-	sub	%ecx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
-	bsf	%edx, %edx
-# ifdef USE_AS_STPCPY
-	lea	(%edi, %edx), %eax
-# endif
-	movdqu	%xmm4, (%edi)
-	add	$63, %ebx
-	sub	%edx, %ebx
-	lea	1(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	16(%edi, %edx), %eax
-# endif
-	movdqu	%xmm5, 16(%edi)
-	add	$47, %ebx
-	sub	%edx, %ebx
-	lea	17(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
-	bsf	%edx, %edx
-	movdqu	%xmm4, (%edi)
-	movdqu	%xmm5, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	32(%edi, %edx), %eax
-# endif
-	movdqu	%xmm6, 32(%edi)
-	add	$31, %ebx
-	sub	%edx, %ebx
-	lea	33(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm6):
-	movdqu	%xmm6, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm5):
-	movdqu	%xmm5, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm4):
-	movdqu	%xmm4, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm3):
-	movdqu	%xmm3, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm1):
-	movdqu	%xmm1, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesExit):
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%ecx, %edi
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	add	$16, %edx
-	sub	%ecx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-/* Case2 or Case3,  Case3 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%ecx, %edi
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32BytesCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTailCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
-	add	$16, %edi
-	add	$16, %esi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1Case2)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(Exit0):
-# ifdef USE_AS_STPCPY
-	mov	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	movb	%dh, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	(%edi), %eax
-# endif
-	sub	$1, %ebx
-	lea	1(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	movw	(%esi), %dx
-	movw	%dx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	1(%edi), %eax
-# endif
-	sub	$2, %ebx
-	lea	2(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	movw	(%esi), %cx
-	movw	%cx, (%edi)
-	movb	%dh, 2(%edi)
-# ifdef USE_AS_STPCPY
-	lea	2(%edi), %eax
-# endif
-	sub	$3, %ebx
-	lea	3(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%esi), %edx
-	movl	%edx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	3(%edi), %eax
-# endif
-	sub	$4, %ebx
-	lea	4(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	movl	(%esi), %ecx
-	movb	%dh, 4(%edi)
-	movl	%ecx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	4(%edi), %eax
-# endif
-	sub	$5, %ebx
-	lea	5(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%edi)
-	movw	%dx, 4(%edi)
-# ifdef USE_AS_STPCPY
-	lea	5(%edi), %eax
-# endif
-	sub	$6, %ebx
-	lea	6(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%edi)
-	movl	%edx, 3(%edi)
-# ifdef USE_AS_STPCPY
-	lea	6(%edi), %eax
-# endif
-	sub	$7, %ebx
-	lea	7(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	7(%edi), %eax
-# endif
-	sub	$8, %ebx
-	lea	8(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%esi), %xmm0
-	movb	%dh, 8(%edi)
-	movlpd	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	8(%edi), %eax
-# endif
-	sub	$9, %ebx
-	lea	9(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%edi)
-	movw	%dx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	9(%edi), %eax
-# endif
-	sub	$10, %ebx
-	lea	10(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	10(%edi), %eax
-# endif
-	sub	$11, %ebx
-	lea	11(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	11(%edi), %eax
-# endif
-	sub	$12, %ebx
-	lea	12(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
-	lea	12(%edi), %eax
-# endif
-	sub	$13, %ebx
-	lea	13(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
-	lea	13(%edi), %eax
-# endif
-	sub	$14, %ebx
-	lea	14(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	14(%edi), %eax
-# endif
-	sub	$15, %ebx
-	lea	15(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	15(%edi), %eax
-# endif
-	sub	$16, %ebx
-	lea	16(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit17):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%edi)
-	movb	%dh, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	16(%edi), %eax
-# endif
-	sub	$17, %ebx
-	lea	17(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movw	%cx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	17(%edi), %eax
-# endif
-	sub	$18, %ebx
-	lea	18(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	18(%edi), %eax
-# endif
-	sub	$19, %ebx
-	lea	19(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	19(%edi), %eax
-# endif
-	sub	$20, %ebx
-	lea	20(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-	movb	%dh, 20(%edi)
-# ifdef USE_AS_STPCPY
-	lea	20(%edi), %eax
-# endif
-	sub	$21, %ebx
-	lea	21(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	21(%edi), %eax
-# endif
-	sub	$22, %ebx
-	lea	22(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	22(%edi), %eax
-# endif
-	sub	$23, %ebx
-	lea	23(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	23(%edi), %eax
-# endif
-	sub	$24, %ebx
-	lea	24(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movb	%dh, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	24(%edi), %eax
-# endif
-	sub	$25, %ebx
-	lea	25(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movw	%cx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	25(%edi), %eax
-# endif
-	sub	$26, %ebx
-	lea	26(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
-	lea	26(%edi), %eax
-# endif
-	sub	$27, %ebx
-	lea	27(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	27(%edi), %eax
-# endif
-	sub	$28, %ebx
-	lea	28(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
-	lea	28(%edi), %eax
-# endif
-	sub	$29, %ebx
-	lea	29(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	29(%edi), %eax
-# endif
-	sub	$30, %ebx
-	lea	30(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-
-	.p2align 4
-L(Exit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	30(%edi), %eax
-# endif
-	sub	$31, %ebx
-	lea	31(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	31(%edi), %eax
-# endif
-	sub	$32, %ebx
-	lea	32(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(StrncpyExit1):
-	movb	(%esi), %dl
-	movb	%dl, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	1(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit2):
-	movw	(%esi), %dx
-	movw	%dx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	2(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit3):
-	movw	(%esi), %cx
-	movb	2(%esi), %dl
-	movw	%cx, (%edi)
-	movb	%dl, 2(%edi)
-# ifdef USE_AS_STPCPY
-	lea	3(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit4):
-	movl	(%esi), %edx
-	movl	%edx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	4(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit5):
-	movl	(%esi), %ecx
-	movb	4(%esi), %dl
-	movl	%ecx, (%edi)
-	movb	%dl, 4(%edi)
-# ifdef USE_AS_STPCPY
-	lea	5(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%edi)
-	movw	%dx, 4(%edi)
-# ifdef USE_AS_STPCPY
-	lea	6(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%edi)
-	movl	%edx, 3(%edi)
-# ifdef USE_AS_STPCPY
-	lea	7(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	8(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit9):
-	movlpd	(%esi), %xmm0
-	movb	8(%esi), %dl
-	movlpd	%xmm0, (%edi)
-	movb	%dl, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	9(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%edi)
-	movw	%dx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	10(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	11(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	12(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
-	lea	13(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
-	lea	14(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	15(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	16(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit17):
-	movdqu	(%esi), %xmm0
-	movb	16(%esi), %cl
-	movdqu	%xmm0, (%edi)
-	movb	%cl, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	17(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movw	%cx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	18(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	19(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	20(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movb	20(%esi), %dl
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-	movb	%dl, 20(%edi)
-# ifdef USE_AS_STPCPY
-	lea	21(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	22(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	23(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	24(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movb	24(%esi), %cl
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movb	%cl, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	25(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movw	%cx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	26(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
-	lea	27(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	28(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
-	lea	29(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	30(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	31(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	32(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit33):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movb	32(%esi), %cl
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 16(%edi)
-	movb	%cl, 32(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill0):
-	RETURN
-
-	.p2align 4
-L(Fill1):
-	movb	%dl, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill2):
-	movw	%dx, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill3):
-	movl	%edx, -1(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill4):
-	movl	%edx, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill5):
-	movl	%edx, (%edi)
-	movb	%dl, 4(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill6):
-	movl	%edx, (%edi)
-	movw	%dx, 4(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill7):
-	movlpd	%xmm0, -1(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill8):
-	movlpd	%xmm0, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill9):
-	movlpd	%xmm0, (%edi)
-	movb	%dl, 8(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill10):
-	movlpd	%xmm0, (%edi)
-	movw	%dx, 8(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill11):
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 7(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill12):
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 8(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill13):
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm0, 5(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill14):
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm0, 6(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill15):
-	movdqu	%xmm0, -1(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill16):
-	movdqu	%xmm0, (%edi)
-	RETURN
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm2):
-	movdqu	%xmm2, (%edi, %ecx)
-
-	.p2align 4
-L(CopyFrom1To16BytesXmmExit):
-	bsf	%edx, %edx
-	add	$15, %ebx
-	add	%ecx, %edi
-# ifdef USE_AS_STPCPY
-	lea	(%edi, %edx), %eax
-# endif
-	sub	%edx, %ebx
-	lea	1(%edi, %edx), %edi
-
-	.p2align 4
-L(StrncpyFillTailWithZero):
-	pxor	%xmm0, %xmm0
-	xor	%edx, %edx
-	sub	$16, %ebx
-	jbe	L(StrncpyFillExit)
-
-	movdqu	%xmm0, (%edi)
-	add	$16, %edi
-
-	mov	%edi, %esi
-	and	$0xf, %esi
-	sub	%esi, %edi
-	add	%esi, %ebx
-	sub	$64, %ebx
-	jb	L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
-	movdqa	%xmm0, (%edi)
-	movdqa	%xmm0, 16(%edi)
-	movdqa	%xmm0, 32(%edi)
-	movdqa	%xmm0, 48(%edi)
-	add	$64, %edi
-	sub	$64, %ebx
-	jae	L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
-	add	$32, %ebx
-	jl	L(StrncpyFillLess32)
-	movdqa	%xmm0, (%edi)
-	movdqa	%xmm0, 16(%edi)
-	add	$32, %edi
-	sub	$16, %ebx
-	jl	L(StrncpyFillExit)
-	movdqa	%xmm0, (%edi)
-	add	$16, %edi
-	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillLess32):
-	add	$16, %ebx
-	jl	L(StrncpyFillExit)
-	movdqa	%xmm0, (%edi)
-	add	$16, %edi
-	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillExit):
-	add	$16, %ebx
-	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-	.p2align 4
-L(UnalignedLeaveCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
-	lea	64(%ebx), %ecx
-	and	$-16, %ecx
-	add	$48, %ebx
-	jl	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm4, (%edi)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm5, 16(%edi)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm6, 32(%edi)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm7, 48(%edi)
-# ifdef USE_AS_STPCPY
-	lea	64(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Unaligned64LeaveCase2):
-	xor	%ecx, %ecx
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm4, (%edi)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm5)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm5, 16(%edi)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm6)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm6, 32(%edi)
-	lea	16(%edi, %ecx), %edi
-	lea	16(%esi, %ecx), %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(ExitZero):
-	movl	%edi, %eax
-	RETURN
-
-END (STRCPY)
-
-	.p2align 4
-	.section .rodata
-L(ExitTable):
-	.int	JMPTBL(L(Exit1), L(ExitTable))
-	.int	JMPTBL(L(Exit2), L(ExitTable))
-	.int	JMPTBL(L(Exit3), L(ExitTable))
-	.int	JMPTBL(L(Exit4), L(ExitTable))
-	.int	JMPTBL(L(Exit5), L(ExitTable))
-	.int	JMPTBL(L(Exit6), L(ExitTable))
-	.int	JMPTBL(L(Exit7), L(ExitTable))
-	.int	JMPTBL(L(Exit8), L(ExitTable))
-	.int	JMPTBL(L(Exit9), L(ExitTable))
-	.int	JMPTBL(L(Exit10), L(ExitTable))
-	.int	JMPTBL(L(Exit11), L(ExitTable))
-	.int	JMPTBL(L(Exit12), L(ExitTable))
-	.int	JMPTBL(L(Exit13), L(ExitTable))
-	.int	JMPTBL(L(Exit14), L(ExitTable))
-	.int	JMPTBL(L(Exit15), L(ExitTable))
-	.int	JMPTBL(L(Exit16), L(ExitTable))
-	.int	JMPTBL(L(Exit17), L(ExitTable))
-	.int	JMPTBL(L(Exit18), L(ExitTable))
-	.int	JMPTBL(L(Exit19), L(ExitTable))
-	.int	JMPTBL(L(Exit20), L(ExitTable))
-	.int	JMPTBL(L(Exit21), L(ExitTable))
-	.int	JMPTBL(L(Exit22), L(ExitTable))
-	.int    JMPTBL(L(Exit23), L(ExitTable))
-	.int	JMPTBL(L(Exit24), L(ExitTable))
-	.int	JMPTBL(L(Exit25), L(ExitTable))
-	.int	JMPTBL(L(Exit26), L(ExitTable))
-	.int	JMPTBL(L(Exit27), L(ExitTable))
-	.int	JMPTBL(L(Exit28), L(ExitTable))
-	.int	JMPTBL(L(Exit29), L(ExitTable))
-	.int	JMPTBL(L(Exit30), L(ExitTable))
-	.int	JMPTBL(L(Exit31), L(ExitTable))
-	.int	JMPTBL(L(Exit32), L(ExitTable))
-
-L(ExitStrncpyTable):
-	.int	JMPTBL(L(Exit0), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
-	.int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
-
-	.p2align 4
-L(FillTable):
-	.int	JMPTBL(L(Fill0), L(FillTable))
-	.int	JMPTBL(L(Fill1), L(FillTable))
-	.int	JMPTBL(L(Fill2), L(FillTable))
-	.int	JMPTBL(L(Fill3), L(FillTable))
-	.int	JMPTBL(L(Fill4), L(FillTable))
-	.int	JMPTBL(L(Fill5), L(FillTable))
-	.int	JMPTBL(L(Fill6), L(FillTable))
-	.int	JMPTBL(L(Fill7), L(FillTable))
-	.int	JMPTBL(L(Fill8), L(FillTable))
-	.int	JMPTBL(L(Fill9), L(FillTable))
-	.int	JMPTBL(L(Fill10), L(FillTable))
-	.int	JMPTBL(L(Fill11), L(FillTable))
-	.int	JMPTBL(L(Fill12), L(FillTable))
-	.int	JMPTBL(L(Fill13), L(FillTable))
-	.int	JMPTBL(L(Fill14), L(FillTable))
-	.int	JMPTBL(L(Fill15), L(FillTable))
-	.int	JMPTBL(L(Fill16), L(FillTable))
-# else
-#  define PARMS  4
-#  define ENTRANCE
-#  define RETURN  POP (%edi); ret; CFI_PUSH (%edi)
-#  define RETURN1  ret
-
-	.text
-ENTRY (STRCPY)
-	ENTRANCE
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	cmpb	$0, 7(%ecx)
-	jz	L(ExitTail8)
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	cmpb	$0, 14(%ecx)
-	jz	L(ExitTail15)
-	cmpb	$0, 15(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	PUSH	(%ebx)
-
-	mov	%edx, %edi
-	lea	16(%ecx), %ebx
-	and	$-16, %ebx
-	pxor	%xmm0, %xmm0
-	movdqu	(%ecx), %xmm1
-	movdqu	%xmm1, (%edx)
-	pcmpeqb	(%ebx), %xmm0
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%ecx, %eax
-	lea	16(%ecx), %ecx
-	and	$-16, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-	xor	%ebx, %ebx
-
-	.p2align 4
-	movdqa	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movdqu	%xmm1, (%edx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm3
-	movdqu	%xmm2, (%edx, %ebx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm4
-	movdqu	%xmm3, (%edx, %ebx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm1
-	movdqu	%xmm4, (%edx, %ebx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm2
-	movdqu	%xmm1, (%edx, %ebx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm3
-	movdqu	%xmm2, (%edx, %ebx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm3, (%edx, %ebx)
-	mov	%ecx, %eax
-	lea	16(%ecx, %ebx), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	add	$64, %ecx
-	pminub	%xmm7, %xmm3
-	add	$64, %edx
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-L(Aligned64Loop_start):
-	movdqu	%xmm4, -64(%edx)
-	movaps	(%ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%edx)
-	movaps	16(%ecx), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%ecx), %xmm3
-	movdqu	%xmm6, -32(%edx)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%edx)
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$64, %edx
-	add	$64, %ecx
-	test	%eax, %eax
-	jz	L(Aligned64Loop_start)
-L(Aligned64Leave):
-	sub	$0xa0, %ebx
-	pxor	%xmm0, %xmm0
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movdqu	%xmm4, -64(%edx)
-	test	%eax, %eax
-	lea	16(%ebx), %ebx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movdqu	%xmm5, -48(%edx)
-	test	%eax, %eax
-	lea	16(%ebx), %ebx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm6, -32(%edx)
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%ebx), %ebx
-
-/*-----------------End of main part---------------------------*/
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%ebx, %edx
-	add	%ebx, %ecx
-
-	POP	(%ebx)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	/* Exit 8 */
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	7(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	/* Exit 16 */
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm0
-	movlpd	%xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	15(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	1(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-# ifdef USE_AS_STPCPY
-	lea	2(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	3(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	4(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	5(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-# ifdef USE_AS_STPCPY
-	lea	6(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	8(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	9(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	10(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	11(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
-	lea	12(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
-	lea	13(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	14(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-CFI_POP (%edi)
-
-	.p2align 4
-L(ExitTail1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	movl	%edx, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitTail2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	1(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-# ifdef USE_AS_STPCPY
-	lea	2(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	3(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	4(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	5(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-# ifdef USE_AS_STPCPY
-	lea	6(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail8):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	7(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail9):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	8(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail10):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	9(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail11):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	10(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail12):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	11(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
-	lea	12(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
-	lea	13(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	14(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail16):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm0
-	movlpd	%xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	15(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-END (STRCPY)
-# endif
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
deleted file mode 100644
index 9f88e7735d..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+++ /dev/null
@@ -1,3901 +0,0 @@
-/* strcpy with SSSE3
-   Copyright (C) 2011-2015 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# ifndef USE_AS_STRCAT
-#  include <sysdep.h>
-
-#  define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#  define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#  define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#  define POP(REG)	popl REG; CFI_POP (REG)
-
-#  ifndef STRCPY
-#   define STRCPY  __strcpy_ssse3
-#  endif
-
-#  ifdef USE_AS_STRNCPY
-#   define PARMS  8
-#   define ENTRANCE PUSH (%ebx)
-#   define RETURN  POP (%ebx); ret; CFI_PUSH (%ebx);
-#   define RETURN1  POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-#  else
-#   define PARMS  4
-#   define ENTRANCE
-#   define RETURN  ret
-#   define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
-#  endif
-
-#  ifdef USE_AS_STPCPY
-#   define SAVE_RESULT(n)  lea	n(%edx), %eax
-#   define SAVE_RESULT_TAIL(n)  lea	n(%edx), %eax
-#  else
-#   define SAVE_RESULT(n)  movl	%edi, %eax
-#   define SAVE_RESULT_TAIL(n)  movl	%edx, %eax
-#  endif
-
-#  define STR1  PARMS
-#  define STR2  STR1+4
-#  define LEN  STR2+4
-
-/* In this code following instructions are used for copying:
-	movb	- 1 byte
-	movw	- 2 byte
-	movl	- 4 byte
-	movlpd	- 8 byte
-	movaps	- 16 byte - requires 16 byte alignment
-	of	sourse and destination adresses.
-*/
-
-.text
-ENTRY (STRCPY)
-	ENTRANCE
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-#  ifdef USE_AS_STRNCPY
-	movl	LEN(%esp), %ebx
-	cmp	$8, %ebx
-	jbe	L(StrncpyExit8Bytes)
-#  endif
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	cmpb	$0, 7(%ecx)
-	jz	L(ExitTail8)
-#  ifdef USE_AS_STRNCPY
-	cmp	$16, %ebx
-	jb	L(StrncpyExit15Bytes)
-#  endif
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	cmpb	$0, 14(%ecx)
-	jz	L(ExitTail15)
-#  ifdef USE_AS_STRNCPY
-	cmp	$16, %ebx
-	je	L(ExitTail16)
-#  endif
-	cmpb	$0, 15(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	mov	%edx, %edi
-# endif
-	PUSH	(%esi)
-# ifdef USE_AS_STRNCPY
-	mov	%ecx, %esi
-	sub	$16, %ebx
-	and	$0xf, %esi
-
-/* add 16 bytes ecx_offset to ebx */
-
-	add	%esi, %ebx
-# endif
-	lea	16(%ecx), %esi
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	movlpd	(%ecx), %xmm1
-	movlpd	%xmm1, (%edx)
-
-	pcmpeqb	(%esi), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %esi
-
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%edx, %eax
-	lea	16(%edx), %edx
-	and	$-16, %edx
-	sub	%edx, %eax
-
-# ifdef USE_AS_STRNCPY
-	add	%eax, %esi
-	lea	-1(%esi), %esi
-	and	$1<<31, %esi
-	test	%esi, %esi
-	jnz	L(ContinueCopy)
-	lea	16(%ebx), %ebx
-
-L(ContinueCopy):
-# endif
-	sub	%eax, %ecx
-	mov	%ecx, %eax
-	and	$0xf, %eax
-	mov	$0, %esi
-
-/* case: ecx_offset == edx_offset */
-
-	jz	L(Align16Both)
-
-	cmp	$8, %eax
-	jae	L(ShlHigh8)
-	cmp	$1, %eax
-	je	L(Shl1)
-	cmp	$2, %eax
-	je	L(Shl2)
-	cmp	$3, %eax
-	je	L(Shl3)
-	cmp	$4, %eax
-	je	L(Shl4)
-	cmp	$5, %eax
-	je	L(Shl5)
-	cmp	$6, %eax
-	je	L(Shl6)
-	jmp	L(Shl7)
-
-L(ShlHigh8):
-	je	L(Shl8)
-	cmp	$9, %eax
-	je	L(Shl9)
-	cmp	$10, %eax
-	je	L(Shl10)
-	cmp	$11, %eax
-	je	L(Shl11)
-	cmp	$12, %eax
-	je	L(Shl12)
-	cmp	$13, %eax
-	je	L(Shl13)
-	cmp	$14, %eax
-	je	L(Shl14)
-	jmp	L(Shl15)
-
-L(Align16Both):
-	movaps	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movaps	%xmm1, (%edx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm4
-	movaps	%xmm3, (%edx, %esi)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm1
-	movaps	%xmm4, (%edx, %esi)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm2
-	movaps	%xmm1, (%edx, %esi)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm3, (%edx, %esi)
-	mov	%ecx, %eax
-	lea	16(%ecx, %esi), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	lea	112(%ebx, %eax), %ebx
-# endif
-	mov	$-0x40, %esi
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	lea	64(%edx), %edx
-	pcmpeqb	%xmm0, %xmm3
-	lea	64(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeaveCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-	movaps	%xmm4, -64(%edx)
-	movaps	%xmm5, -48(%edx)
-	movaps	%xmm6, -32(%edx)
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-L(Aligned64Leave):
-# ifdef USE_AS_STRNCPY
-	lea	48(%ebx), %ebx
-# endif
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-# ifdef USE_AS_STRNCPY
-	lea	-16(%ebx), %ebx
-# endif
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-# ifdef USE_AS_STRNCPY
-	lea	-16(%ebx), %ebx
-# endif
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm6, -32(%edx)
-	pcmpeqb	%xmm7, %xmm0
-# ifdef USE_AS_STRNCPY
-	lea	-16(%ebx), %ebx
-# endif
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl1):
-	movaps	-1(%ecx), %xmm1
-	movaps	15(%ecx), %xmm2
-L(Shl1Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	31(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-15(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-1(%ecx), %xmm1
-
-L(Shl1LoopStart):
-	movaps	15(%ecx), %xmm2
-	movaps	31(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	47(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	63(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$1, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$1, %xmm3, %xmm4
-	jnz	L(Shl1Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave1)
-# endif
-	palignr	$1, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl1LoopStart)
-
-L(Shl1LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	mov	$15, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl2):
-	movaps	-2(%ecx), %xmm1
-	movaps	14(%ecx), %xmm2
-L(Shl2Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	30(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-14(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-2(%ecx), %xmm1
-
-L(Shl2LoopStart):
-	movaps	14(%ecx), %xmm2
-	movaps	30(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	46(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	62(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$2, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$2, %xmm3, %xmm4
-	jnz	L(Shl2Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave2)
-# endif
-	palignr	$2, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl2LoopStart)
-
-L(Shl2LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	mov	$14, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl3):
-	movaps	-3(%ecx), %xmm1
-	movaps	13(%ecx), %xmm2
-L(Shl3Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	29(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-13(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-3(%ecx), %xmm1
-
-L(Shl3LoopStart):
-	movaps	13(%ecx), %xmm2
-	movaps	29(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	45(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	61(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$3, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$3, %xmm3, %xmm4
-	jnz	L(Shl3Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave3)
-# endif
-	palignr	$3, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl3LoopStart)
-
-L(Shl3LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	mov	$13, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl4):
-	movaps	-4(%ecx), %xmm1
-	movaps	12(%ecx), %xmm2
-L(Shl4Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	28(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-12(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-4(%ecx), %xmm1
-
-L(Shl4LoopStart):
-	movaps	12(%ecx), %xmm2
-	movaps	28(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	44(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	60(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$4, %xmm3, %xmm4
-	jnz	L(Shl4Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave4)
-# endif
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl4LoopStart)
-
-L(Shl4LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	mov	$12, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl5):
-	movaps	-5(%ecx), %xmm1
-	movaps	11(%ecx), %xmm2
-L(Shl5Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	27(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-11(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-5(%ecx), %xmm1
-
-L(Shl5LoopStart):
-	movaps	11(%ecx), %xmm2
-	movaps	27(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	43(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	59(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$5, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$5, %xmm3, %xmm4
-	jnz	L(Shl5Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave5)
-# endif
-	palignr	$5, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl5LoopStart)
-
-L(Shl5LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 7(%edx)
-	mov	$11, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl6):
-	movaps	-6(%ecx), %xmm1
-	movaps	10(%ecx), %xmm2
-L(Shl6Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	26(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-10(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-6(%ecx), %xmm1
-
-L(Shl6LoopStart):
-	movaps	10(%ecx), %xmm2
-	movaps	26(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	42(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	58(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$6, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$6, %xmm3, %xmm4
-	jnz	L(Shl6Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave6)
-# endif
-	palignr	$6, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl6LoopStart)
-
-L(Shl6LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	6(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 6(%edx)
-	mov	$10, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl7):
-	movaps	-7(%ecx), %xmm1
-	movaps	9(%ecx), %xmm2
-L(Shl7Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	25(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-9(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-7(%ecx), %xmm1
-
-L(Shl7LoopStart):
-	movaps	9(%ecx), %xmm2
-	movaps	25(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	41(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	57(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$7, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$7, %xmm3, %xmm4
-	jnz	L(Shl7Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave7)
-# endif
-	palignr	$7, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl7LoopStart)
-
-L(Shl7LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	5(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 5(%edx)
-	mov	$9, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl8):
-	movaps	-8(%ecx), %xmm1
-	movaps	8(%ecx), %xmm2
-L(Shl8Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	24(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-8(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-8(%ecx), %xmm1
-
-L(Shl8LoopStart):
-	movaps	8(%ecx), %xmm2
-	movaps	24(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	40(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	56(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$8, %xmm3, %xmm4
-	jnz	L(Shl8Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave8)
-# endif
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl8LoopStart)
-
-L(Shl8LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	mov	$8, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl9):
-	movaps	-9(%ecx), %xmm1
-	movaps	7(%ecx), %xmm2
-L(Shl9Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	23(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-7(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-9(%ecx), %xmm1
-
-L(Shl9LoopStart):
-	movaps	7(%ecx), %xmm2
-	movaps	23(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	39(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	55(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$9, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$9, %xmm3, %xmm4
-	jnz	L(Shl9Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave9)
-# endif
-	palignr	$9, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl9LoopStart)
-
-L(Shl9LoopExit):
-	movlpd	-1(%ecx), %xmm0
-	movlpd	%xmm0, -1(%edx)
-	mov	$7, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl10):
-	movaps	-10(%ecx), %xmm1
-	movaps	6(%ecx), %xmm2
-L(Shl10Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	22(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-6(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-10(%ecx), %xmm1
-
-L(Shl10LoopStart):
-	movaps	6(%ecx), %xmm2
-	movaps	22(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	38(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	54(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$10, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$10, %xmm3, %xmm4
-	jnz	L(Shl10Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave10)
-# endif
-	palignr	$10, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl10LoopStart)
-
-L(Shl10LoopExit):
-	movlpd	-2(%ecx), %xmm0
-	movlpd	%xmm0, -2(%edx)
-	mov	$6, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl11):
-	movaps	-11(%ecx), %xmm1
-	movaps	5(%ecx), %xmm2
-L(Shl11Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	21(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-5(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-11(%ecx), %xmm1
-
-L(Shl11LoopStart):
-	movaps	5(%ecx), %xmm2
-	movaps	21(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	37(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	53(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$11, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$11, %xmm3, %xmm4
-	jnz	L(Shl11Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave11)
-# endif
-	palignr	$11, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl11LoopStart)
-
-L(Shl11LoopExit):
-	movlpd	-3(%ecx), %xmm0
-	movlpd	%xmm0, -3(%edx)
-	mov	$5, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl12):
-	movaps	-12(%ecx), %xmm1
-	movaps	4(%ecx), %xmm2
-L(Shl12Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	20(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-4(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-12(%ecx), %xmm1
-
-L(Shl12LoopStart):
-	movaps	4(%ecx), %xmm2
-	movaps	20(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	36(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	52(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$12, %xmm3, %xmm4
-	jnz	L(Shl12Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave12)
-# endif
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl12LoopStart)
-
-L(Shl12LoopExit):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl13):
-	movaps	-13(%ecx), %xmm1
-	movaps	3(%ecx), %xmm2
-L(Shl13Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	19(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-3(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-13(%ecx), %xmm1
-
-L(Shl13LoopStart):
-	movaps	3(%ecx), %xmm2
-	movaps	19(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	35(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	51(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$13, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$13, %xmm3, %xmm4
-	jnz	L(Shl13Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave13)
-# endif
-	palignr	$13, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl13LoopStart)
-
-L(Shl13LoopExit):
-	movl	-1(%ecx), %esi
-	movl	%esi, -1(%edx)
-	mov	$3, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl14):
-	movaps	-14(%ecx), %xmm1
-	movaps	2(%ecx), %xmm2
-L(Shl14Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	18(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-2(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-14(%ecx), %xmm1
-
-L(Shl14LoopStart):
-	movaps	2(%ecx), %xmm2
-	movaps	18(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	34(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	50(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$14, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$14, %xmm3, %xmm4
-	jnz	L(Shl14Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave14)
-# endif
-	palignr	$14, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl14LoopStart)
-
-L(Shl14LoopExit):
-	movl	-2(%ecx), %esi
-	movl	%esi, -2(%edx)
-	mov	$2, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl15):
-	movaps	-15(%ecx), %xmm1
-	movaps	1(%ecx), %xmm2
-L(Shl15Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	17(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-1(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-15(%ecx), %xmm1
-
-L(Shl15LoopStart):
-	movaps	1(%ecx), %xmm2
-	movaps	17(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	33(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	49(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$15, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$15, %xmm3, %xmm4
-	jnz	L(Shl15Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave15)
-# endif
-	palignr	$15, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl15LoopStart)
-
-L(Shl15LoopExit):
-	movl	-3(%ecx), %esi
-	movl	%esi, -3(%edx)
-	mov	$1, %esi
-# ifdef USE_AS_STRCAT
-	jmp	L(CopyFrom1To16Bytes)
-# endif
-
-
-# ifndef USE_AS_STRCAT
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-#  ifdef USE_AS_STRNCPY
-	add	$16, %ebx
-#  endif
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
-	mov	%al, %ah
-	and	$15, %ah
-	jz	L(ExitHigh4)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT	(3)
-#  ifdef USE_AS_STRNCPY
-	sub	$4, %ebx
-	lea	4(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4):
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-
-	.p2align 4
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	SAVE_RESULT	(7)
-#  ifdef USE_AS_STRNCPY
-	sub	$8, %ebx
-	lea	8(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8):
-	mov	%ah, %al
-	and	$15, %al
-	jz	L(ExitHigh12)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT	(11)
-#  ifdef USE_AS_STRNCPY
-	sub	$12, %ebx
-	lea	12(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12):
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-
-	.p2align 4
-L(Exit16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	SAVE_RESULT	(15)
-#  ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	lea	16(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-#   ifdef USE_AS_STRNCPY
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	add	%esi, %edx
-
-	POP	(%esi)
-
-	test	%al, %al
-	jz	L(ExitHighCase2)
-
-	cmp	$8, %ebx
-	ja	L(CopyFrom1To16BytesLess8)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(Exit7)
-	jmp	L(Exit8)
-
-	.p2align 4
-L(ExitHighCase2):
-	cmp	$8, %ebx
-	jbe	L(CopyFrom1To16BytesLess8Case3)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(Exit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(Exit15)
-	jmp	L(Exit16)
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
-	cmp	$4, %ebx
-	ja	L(ExitHigh4Case3)
-
-	cmp	$1, %ebx
-	je	L(Exit1)
-	cmp	$2, %ebx
-	je	L(Exit2)
-	cmp	$3, %ebx
-	je	L(Exit3)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT	(4)
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4Case3):
-	cmp	$5, %ebx
-	je	L(Exit5)
-	cmp	$6, %ebx
-	je	L(Exit6)
-	cmp	$7, %ebx
-	je	L(Exit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	SAVE_RESULT	(8)
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8Case3):
-	cmp	$12, %ebx
-	ja	L(ExitHigh12Case3)
-
-	cmp	$9, %ebx
-	je	L(Exit9)
-	cmp	$10, %ebx
-	je	L(Exit10)
-	cmp	$11, %ebx
-	je	L(Exit11)
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT	(12)
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12Case3):
-	cmp	$13, %ebx
-	je	L(Exit13)
-	cmp	$14, %ebx
-	je	L(Exit14)
-	cmp	$15, %ebx
-	je	L(Exit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	SAVE_RESULT	(16)
-	RETURN1
-
-#  endif
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	SAVE_RESULT	(0)
-#  ifdef USE_AS_STRNCPY
-	sub	$1, %ebx
-	lea	1(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	SAVE_RESULT	(1)
-#  ifdef USE_AS_STRNCPY
-	sub	$2, %ebx
-	lea	2(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	SAVE_RESULT	(2)
-#  ifdef USE_AS_STRNCPY
-	sub	$3, %ebx
-	lea	3(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	SAVE_RESULT	(4)
-#  ifdef USE_AS_STRNCPY
-	sub	$5, %ebx
-	lea	5(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	SAVE_RESULT	(5)
-#  ifdef USE_AS_STRNCPY
-	sub	$6, %ebx
-	lea	6(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	SAVE_RESULT	(6)
-#  ifdef USE_AS_STRNCPY
-	sub	$7, %ebx
-	lea	7(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movb	8(%ecx), %al
-	movlpd	%xmm0, (%edx)
-	movb	%al, 8(%edx)
-	SAVE_RESULT	(8)
-#  ifdef USE_AS_STRNCPY
-	sub	$9, %ebx
-	lea	9(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movw	8(%ecx), %ax
-	movlpd	%xmm0, (%edx)
-	movw	%ax, 8(%edx)
-	SAVE_RESULT	(9)
-#  ifdef USE_AS_STRNCPY
-	sub	$10, %ebx
-	lea	10(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 7(%edx)
-	SAVE_RESULT	(10)
-#  ifdef USE_AS_STRNCPY
-	sub	$11, %ebx
-	lea	11(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	SAVE_RESULT	(12)
-#  ifdef USE_AS_STRNCPY
-	sub	$13, %ebx
-	lea	13(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	SAVE_RESULT	(13)
-#  ifdef USE_AS_STRNCPY
-	sub	$14, %ebx
-	lea	14(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-	SAVE_RESULT	(14)
-#  ifdef USE_AS_STRNCPY
-	sub	$15, %ebx
-	lea	15(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-CFI_POP	(%edi)
-
-#  ifdef USE_AS_STRNCPY
-	.p2align 4
-L(Fill0):
-	RETURN
-
-	.p2align 4
-L(Fill1):
-	movb	%dl, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill2):
-	movw	%dx, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill3):
-	movw	%dx, (%ecx)
-	movb	%dl, 2(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill4):
-	movl	%edx, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill5):
-	movl	%edx, (%ecx)
-	movb	%dl, 4(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill6):
-	movl	%edx, (%ecx)
-	movw	%dx, 4(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill7):
-	movl	%edx, (%ecx)
-	movl	%edx, 3(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill8):
-	movlpd	%xmm0, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill9):
-	movlpd	%xmm0, (%ecx)
-	movb	%dl, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill10):
-	movlpd	%xmm0, (%ecx)
-	movw	%dx, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill11):
-	movlpd	%xmm0, (%ecx)
-	movl	%edx, 7(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill12):
-	movlpd	%xmm0, (%ecx)
-	movl	%edx, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill13):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 5(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill14):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 6(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill15):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 7(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill16):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(StrncpyFillExit1):
-	lea	16(%ebx), %ebx
-L(FillFrom1To16Bytes):
-	test	%ebx, %ebx
-	jz	L(Fill0)
-	cmp	$16, %ebx
-	je	L(Fill16)
-	cmp	$8, %ebx
-	je	L(Fill8)
-	jg	L(FillMore8)
-	cmp	$4, %ebx
-	je	L(Fill4)
-	jg	L(FillMore4)
-	cmp	$2, %ebx
-	jl	L(Fill1)
-	je	L(Fill2)
-	jg	L(Fill3)
-L(FillMore8):	/* but less than 16 */
-	cmp	$12, %ebx
-	je	L(Fill12)
-	jl	L(FillLess12)
-	cmp	$14, %ebx
-	jl	L(Fill13)
-	je	L(Fill14)
-	jg	L(Fill15)
-L(FillMore4):	/* but less than 8 */
-	cmp	$6, %ebx
-	jl	L(Fill5)
-	je	L(Fill6)
-	jg	L(Fill7)
-L(FillLess12):	/* but more than 8 */
-	cmp	$10, %ebx
-	jl	L(Fill9)
-	je	L(Fill10)
-	jmp	L(Fill11)
-
-	CFI_PUSH(%edi)
-
-	.p2align 4
-L(StrncpyFillTailWithZero1):
-	POP	(%edi)
-L(StrncpyFillTailWithZero):
-	pxor	%xmm0, %xmm0
-	xor	%edx, %edx
-	sub	$16, %ebx
-	jbe	L(StrncpyFillExit1)
-
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 8(%ecx)
-
-	lea	16(%ecx), %ecx
-
-	mov	%ecx, %edx
-	and	$0xf, %edx
-	sub	%edx, %ecx
-	add	%edx, %ebx
-	xor	%edx, %edx
-	sub	$64, %ebx
-	jb	L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
-	movdqa	%xmm0, (%ecx)
-	movdqa	%xmm0, 16(%ecx)
-	movdqa	%xmm0, 32(%ecx)
-	movdqa	%xmm0, 48(%ecx)
-	lea	64(%ecx), %ecx
-	sub	$64, %ebx
-	jae	L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
-	add	$32, %ebx
-	jl	L(StrncpyFillLess32)
-	movdqa	%xmm0, (%ecx)
-	movdqa	%xmm0, 16(%ecx)
-	lea	32(%ecx), %ecx
-	sub	$16, %ebx
-	jl	L(StrncpyFillExit1)
-	movdqa	%xmm0, (%ecx)
-	lea	16(%ecx), %ecx
-	jmp	L(FillFrom1To16Bytes)
-
-L(StrncpyFillLess32):
-	add	$16, %ebx
-	jl	L(StrncpyFillExit1)
-	movdqa	%xmm0, (%ecx)
-	lea	16(%ecx), %ecx
-	jmp	L(FillFrom1To16Bytes)
-#  endif
-
-	.p2align 4
-L(ExitTail1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	SAVE_RESULT_TAIL (0)
-#  ifdef USE_AS_STRNCPY
-	sub	$1, %ebx
-	lea	1(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	SAVE_RESULT_TAIL (1)
-#  ifdef USE_AS_STRNCPY
-	sub	$2, %ebx
-	lea	2(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	SAVE_RESULT_TAIL (2)
-#  ifdef USE_AS_STRNCPY
-	sub	$3, %ebx
-	lea	3(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT_TAIL (3)
-#  ifdef USE_AS_STRNCPY
-	sub	$4, %ebx
-	lea	4(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	SAVE_RESULT_TAIL (4)
-#  ifdef USE_AS_STRNCPY
-	sub	$5, %ebx
-	lea	5(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	SAVE_RESULT_TAIL (5)
-#  ifdef USE_AS_STRNCPY
-	sub	$6, %ebx
-	lea	6(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	SAVE_RESULT_TAIL (6)
-#  ifdef USE_AS_STRNCPY
-	sub	$7, %ebx
-	lea	7(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	SAVE_RESULT_TAIL (7)
-#  ifdef USE_AS_STRNCPY
-	sub	$8, %ebx
-	lea	8(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail9):
-	movlpd	(%ecx), %xmm0
-	movb	8(%ecx), %al
-	movlpd	%xmm0, (%edx)
-	movb	%al, 8(%edx)
-	SAVE_RESULT_TAIL (8)
-#  ifdef USE_AS_STRNCPY
-	sub	$9, %ebx
-	lea	9(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail10):
-	movlpd	(%ecx), %xmm0
-	movw	8(%ecx), %ax
-	movlpd	%xmm0, (%edx)
-	movw	%ax, 8(%edx)
-	SAVE_RESULT_TAIL (9)
-#  ifdef USE_AS_STRNCPY
-	sub	$10, %ebx
-	lea	10(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail11):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 7(%edx)
-	SAVE_RESULT_TAIL (10)
-#  ifdef USE_AS_STRNCPY
-	sub	$11, %ebx
-	lea	11(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT_TAIL (11)
-#  ifdef USE_AS_STRNCPY
-	sub	$12, %ebx
-	lea	12(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail13):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	SAVE_RESULT_TAIL (12)
-#  ifdef USE_AS_STRNCPY
-	sub	$13, %ebx
-	lea	13(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail14):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	SAVE_RESULT_TAIL (13)
-#  ifdef USE_AS_STRNCPY
-	sub	$14, %ebx
-	lea	14(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail15):
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-	SAVE_RESULT_TAIL (14)
-#  ifdef USE_AS_STRNCPY
-	sub	$15, %ebx
-	lea	15(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	SAVE_RESULT_TAIL (15)
-#  ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	lea	16(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-# endif
-
-# ifdef USE_AS_STRNCPY
-#  ifndef USE_AS_STRCAT
-	CFI_PUSH (%esi)
-	CFI_PUSH (%edi)
-#  endif
-	.p2align 4
-L(StrncpyLeaveCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(Aligned64LeaveCase2)
-
-L(Aligned64LeaveCase3):
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase3)
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase3)
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase3)
-	movaps	%xmm6, -32(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(Aligned64LeaveCase2):
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm6, -32(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-	jmp	L(CopyFrom1To16BytesCase2)
-
-/*--------------------------------------------------*/
-	.p2align 4
-L(StrncpyExit1Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-	mov	$15, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit2Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	mov	$14, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit3Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	mov	$13, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit4Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	mov	$12, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit5Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 7(%edx)
-	mov	$11, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit6Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	6(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 6(%edx)
-	mov	$10, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit7Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	5(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 5(%edx)
-	mov	$9, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit8Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	mov	$8, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit9Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	mov	$7, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit10Case2OrCase3):
-	movlpd	-1(%ecx), %xmm0
-	movlpd	%xmm0, -1(%edx)
-	mov	$6, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit11Case2OrCase3):
-	movlpd	-2(%ecx), %xmm0
-	movlpd	%xmm0, -2(%edx)
-	mov	$5, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit12Case2OrCase3):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit13Case2OrCase3):
-	movl	-1(%ecx), %esi
-	movl	%esi, -1(%edx)
-	mov	$3, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit14Case2OrCase3):
-	movl	-2(%ecx), %esi
-	movl	%esi, -2(%edx)
-	mov	$2, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit15Case2OrCase3):
-	movl	-3(%ecx), %esi
-	movl	%esi, -3(%edx)
-	mov	$1, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave1):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit1)
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1)
-	palignr	$1, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit1):
-	lea	15(%edx, %esi), %edx
-	lea	15(%ecx, %esi), %ecx
-	movdqu	-16(%ecx), %xmm0
-	xor	%esi, %esi
-	movdqu	%xmm0, -16(%edx)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave2):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit2)
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2)
-	palignr	$2, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit2):
-	lea	14(%edx, %esi), %edx
-	lea	14(%ecx, %esi), %ecx
-	movdqu	-16(%ecx), %xmm0
-	xor	%esi, %esi
-	movdqu	%xmm0, -16(%edx)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave3):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit3)
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3)
-	palignr	$3, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit3):
-	lea	13(%edx, %esi), %edx
-	lea	13(%ecx, %esi), %ecx
-	movdqu	-16(%ecx), %xmm0
-	xor	%esi, %esi
-	movdqu	%xmm0, -16(%edx)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave4):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit4)
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4)
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit4):
-	lea	12(%edx, %esi), %edx
-	lea	12(%ecx, %esi), %ecx
-	movlpd	-12(%ecx), %xmm0
-	movl	-4(%ecx), %eax
-	movlpd	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave5):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit5)
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5)
-	palignr	$5, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit5):
-	lea	11(%edx, %esi), %edx
-	lea	11(%ecx, %esi), %ecx
-	movlpd	-11(%ecx), %xmm0
-	movl	-4(%ecx), %eax
-	movlpd	%xmm0, -11(%edx)
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave6):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit6)
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6)
-	palignr	$6, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit6):
-	lea	10(%edx, %esi), %edx
-	lea	10(%ecx, %esi), %ecx
-
-	movlpd	-10(%ecx), %xmm0
-	movw	-2(%ecx), %ax
-	movlpd	%xmm0, -10(%edx)
-	movw	%ax, -2(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave7):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit7)
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7)
-	palignr	$7, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit7):
-	lea	9(%edx, %esi), %edx
-	lea	9(%ecx, %esi), %ecx
-
-	movlpd	-9(%ecx), %xmm0
-	movb	-1(%ecx), %ah
-	movlpd	%xmm0, -9(%edx)
-	movb	%ah, -1(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave8):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit8)
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8)
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit8):
-	lea	8(%edx, %esi), %edx
-	lea	8(%ecx, %esi), %ecx
-	movlpd	-8(%ecx), %xmm0
-	movlpd	%xmm0, -8(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave9):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit9)
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9)
-	palignr	$9, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit9):
-	lea	7(%edx, %esi), %edx
-	lea	7(%ecx, %esi), %ecx
-
-	movlpd	-8(%ecx), %xmm0
-	movlpd	%xmm0, -8(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave10):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit10)
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10)
-	palignr	$10, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit10):
-	lea	6(%edx, %esi), %edx
-	lea	6(%ecx, %esi), %ecx
-
-	movlpd	-8(%ecx), %xmm0
-	movlpd	%xmm0, -8(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave11):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit11)
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11)
-	palignr	$11, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit11):
-	lea	5(%edx, %esi), %edx
-	lea	5(%ecx, %esi), %ecx
-	movl	-5(%ecx), %esi
-	movb	-1(%ecx), %ah
-	movl	%esi, -5(%edx)
-	movb	%ah, -1(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave12):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit12)
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12)
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit12):
-	lea	4(%edx, %esi), %edx
-	lea	4(%ecx, %esi), %ecx
-	movl	-4(%ecx), %eax
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave13):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit13)
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13)
-	palignr	$13, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit13):
-	lea	3(%edx, %esi), %edx
-	lea	3(%ecx, %esi), %ecx
-
-	movl	-4(%ecx), %eax
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave14):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit14)
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14)
-	palignr	$14, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit14):
-	lea	2(%edx, %esi), %edx
-	lea	2(%ecx, %esi), %ecx
-	movw	-2(%ecx), %ax
-	movw	%ax, -2(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave15):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit15)
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15)
-	palignr	$15, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit15):
-	lea	1(%edx, %esi), %edx
-	lea	1(%ecx, %esi), %ecx
-	movb	-1(%ecx), %ah
-	movb	%ah, -1(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-# endif
-
-# ifndef USE_AS_STRCAT
-#  ifdef USE_AS_STRNCPY
-	CFI_POP (%esi)
-	CFI_POP (%edi)
-
-	.p2align 4
-L(ExitTail0):
-	movl	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$12, %ebx
-	jbe	L(StrncpyExit12Bytes)
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-	cmp	$13, %ebx
-	je	L(ExitTail13)
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmp	$14, %ebx
-	je	L(ExitTail14)
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-#   ifdef USE_AS_STPCPY
-	lea	14(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   else
-	movl	%edx, %eax
-#   endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit12Bytes):
-	cmp	$9, %ebx
-	je	L(ExitTail9)
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmp	$10, %ebx
-	je	L(ExitTail10)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmp	$11, %ebx
-	je	L(ExitTail11)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT_TAIL (11)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmp	$4, %ebx
-	jbe	L(StrncpyExit4Bytes)
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-
-	cmp	$5, %ebx
-	je	L(ExitTail5)
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmp	$6, %ebx
-	je	L(ExitTail6)
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmp	$7, %ebx
-	je	L(ExitTail7)
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-#   ifdef USE_AS_STPCPY
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   else
-	movl	%edx, %eax
-#   endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit4Bytes):
-	test	%ebx, %ebx
-	jz	L(ExitTail0)
-	cmp	$1, %ebx
-	je	L(ExitTail1)
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmp	$2, %ebx
-	je	L(ExitTail2)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmp	$3, %ebx
-	je	L(ExitTail3)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT_TAIL (3)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-	RETURN
-#  endif
-
-END (STRCPY)
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S
deleted file mode 100644
index e9db766347..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Multiple versions of strcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2015 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
-# ifndef STRCPY
-#  define STRCPY strcpy
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-#  define STRCPY_SSSE3	__stpncpy_ssse3
-#  define STRCPY_SSE2		__stpncpy_sse2
-#  define STRCPY_IA32		__stpncpy_ia32
-#  define __GI_STRCPY		__GI_stpncpy
-#  define __GI___STRCPY		__GI___stpncpy
-# else
-#  define STRCPY_SSSE3	__stpcpy_ssse3
-#  define STRCPY_SSE2		__stpcpy_sse2
-#  define STRCPY_IA32		__stpcpy_ia32
-#  define __GI_STRCPY		__GI_stpcpy
-#  define __GI___STRCPY		__GI___stpcpy
-# endif
-#else
-# ifdef USE_AS_STRNCPY
-#  define STRCPY_SSSE3	__strncpy_ssse3
-#  define STRCPY_SSE2		__strncpy_sse2
-#  define STRCPY_IA32		__strncpy_ia32
-#  define __GI_STRCPY		__GI_strncpy
-# else
-#  define STRCPY_SSSE3	__strcpy_ssse3
-#  define STRCPY_SSE2		__strcpy_sse2
-#  define STRCPY_IA32		__strcpy_ia32
-#  define __GI_STRCPY		__GI_strcpy
-# endif
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strncpy in static library since we
-   need strncpy before the initialization happened.  */
-#if IS_IN (libc)
-
-	.text
-ENTRY(STRCPY)
-	.type	STRCPY, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (STRCPY_IA32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCPY_SSE2)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCPY_SSSE3)
-2:	ret
-END(STRCPY)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCPY_IA32, @function; \
-	.align 16; \
-	.globl STRCPY_IA32; \
-	.hidden STRCPY_IA32; \
-	STRCPY_IA32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCPY_IA32, .-STRCPY_IA32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCPY; __GI_STRCPY = STRCPY_IA32
-#  undef libc_hidden_def
-#  define libc_hidden_def(name) \
-	.globl __GI___STRCPY; __GI___STRCPY = STRCPY_IA32
-
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-#  include "../../stpncpy.S"
-# else
-#  include "../../i586/stpcpy.S"
-# endif
-#else
-# ifndef USE_AS_STRNCPY
-#  include "../../i586/strcpy.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncpy-c.c b/sysdeps/i386/i686/multiarch/strncpy-c.c
deleted file mode 100644
index 201e3f98b3..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCPY __strncpy_ia32
-#ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)  \
-    __hidden_ver1 (__strncpy_ia32, __GI_strncpy, __strncpy_ia32);
-#endif
-
-#include "string/strncpy.c"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-sse2.S b/sysdeps/i386/i686/multiarch/strncpy-sse2.S
deleted file mode 100644
index bdd99239a4..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S b/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
deleted file mode 100644
index bf82ee447d..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy.S b/sysdeps/i386/i686/multiarch/strncpy.S
deleted file mode 100644
index 9c257efc6e..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define USE_AS_STRNCPY
-#define STRCPY strncpy
-#include "strcpy.S"